示例#1
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
示例#2
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class SimpleCBOW:
    """
    Simple continuous bag-of-words.
    """
    def __init__(self, vocabulary_size, hidden_size):
        V, H = vocabulary_size, hidden_size

        # initialize weights
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # generate layers
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # list all weights and gradient layers
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set distributed representation of words to variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        """
        :param contexts: dim 3 of numpy array
        :param target: dim2 of numpy array
        """
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        """
        Continuous bag-of-words (CBOW)
                0.5*da
        MatMul <-+                  vector  ----+
         W_in    |                              v
                 |     0.5*da                  Softmax
                 +-- [+] <- [x] <-- MatMul <-- With    <-- Loss
                 |           ^  da  W_out   ds Loss     1
                 |   0.5 ----+
        MatMul <-+
         W_in   0.5*da
        """
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
示例#4
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # layer0, layer1은 weight-sharing
        self.in_layer0 = MatMul(W_in)  ## 입력층은 윈도우 크기만큼 만들어야함, 인스턴스 생성.
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모음
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현 저장
        self.word_vecs = W_in

    def forward(self, contexts, target):
        # 양옆 단어에 대한 x*Win을 batch만큼 수행. -> 해당단어가 중심단어에 관해 어느정도의 의미가 있는지를 나타내(분산표현)
        # -> one_hot으로 표현되어 matmul이 수행되므로 weight에서 해당 행이 분산표현 벡터(값)이 됨.
        h0 = self.in_layer0.forward(
            contexts[:, 0])  # (batch, 7) * (vocab_size(7), hidden)
        h1 = self.in_layer1.forward(
            contexts[:, 1])  # (bathc, 7) * (vocab_size, hidden)
        h = (h0 + h1) * 0.5  # 양 옆의 분산표현의 합.
        score = self.out_layer.forward(
            h)  # (batch,hidden) * ( hidden, vocab_size )
        # print(score)
        # print(target)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
示例#5
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # 입력층 1개
        self.in_layer = MatMul(W_in)
        # 출력층 1개
        self.out_layer = MatMul(W_out)
        # 맥락의 수만큼 손실 계층을 구한다
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현을 저장한다
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)

        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])

        loss = l1 + l2

        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)

        ds = dl1 + dl2

        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)

        return None
示例#6
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')
        W_in = np.array(
            [[-1.0655735, 1.3231287, -1.1051644, -1.1049938, -1.0685176],
             [1.1559865, 0.08719956, 1.1672966, 1.1607609, 1.1567391],
             [-0.7532327, 0.6444376, -0.76896185, -0.71775854, -0.7918966],
             [0.9111972, 1.9940354, 0.6837302, 0.89859486, 0.87255],
             [-0.78328615, 0.6444221, -0.7729693, -0.7400077, -0.80646306],
             [-1.058986, 1.3268483, -1.1123687, -1.1059289, -1.0616288],
             [1.1203294, -1.6394324, 1.2104743, 1.1509397,
              1.1612827]]).astype('f')

        # レイヤの生成
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 全ての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
示例#7
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):

        V, H = vocab_size, hidden_size

        # 重みの初期設定
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(H, V).astype("f")

        # 各レイヤを作る。
        self.in_layer = MatMul(W_in)
        # 予測すべきcontextの単語数分だけloss_layerを作成する必要がある
        self.out_layer = MatMul(W_out)
        self.loss_layer0 = SoftmaxWithLoss()
        self.loss_layer1 = SoftmaxWithLoss()

        # 全てのlayer,重み,勾配をリストにまとめる
        layers = [
            self.in_layer,
            self.out_layer,
            self.loss_layer0,
            self.loss_layer1,
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        score = self.out_layer.forward(h)
        loss0 = self.loss_layer0.forward(score, contexts[:, 0])
        loss1 = self.loss_layer1.forward(score, contexts[:, 1])
        loss = loss0 + loss1
        return loss

    def backward(self, dout=1):
        dl0 = self.loss_layer0.backward(dout)
        dl1 = self.loss_layer1.backward(dout)
        ds = dl0 + dl1
        da = self.out_layer.backward(ds)
        self.in_layer.backward(da)

        return None
示例#8
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):

        V, H = vocab_size, hidden_size

        # 重みの初期設定
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(H, V).astype("f")

        # 各レイヤを作る。
        # contextで使用する単語数分だけin_layerは作成する必要がある
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 全てのlayer,重み,勾配をリストにまとめる
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        # 平均を取る過程で0.5をかけているため
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)

        return None
示例#9
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        self.in_layer0 = MatMul(W_in)  ## 입력층은 윈도우 크기만큼 만들어야함
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모음
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현 저장
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        # print(score)
        # print(target)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
示例#10
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = tf.Variable(
            tf.random.normal((V, H), mean=0.0, stddev=0.01, dtype='float'))
        W_out = tf.Variable(
            tf.random.normal((H, V), mean=0.0, stddev=0.01, dtype='float'))

        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        layers = [
            self.in_layer, self.out_layer, self.loss_layer1, self.loss_layer2
        ]

        self.params = []
        self.grads = []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)

        return None
示例#11
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        # 인수로 어휘 수와 은닉층의 뉴런 수를 받는다.
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')  # 32비트 부동소수점 수
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)  # W_in은 contexts의 개수만큼 생성 (즉, window_size*2 만큼 생성)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다.
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현을 저장한다.
        self.word_vecs = W_in

    def forward(self, contexts, target):  # 인수로 맥락과 타깃을 받아서 loss를 반환
        # contexts.shape = (6, 2, 7), target.shape = (6, 7)
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # initialize weight
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # create layer
        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # combine all weights and grads into list
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set word vector to member variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # initialize weights
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # create layer
        self.in_layer_0 = MatMul(W_in)
        self.in_layer_1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # combine all weights and grads into list
        layers = [self.in_layer_0, self.in_layer_1, self.out_layer]
        self.params, self.grads = [], []

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set word vector into member variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer_0.forward(contexts[:, 0])
        h1 = self.in_layer_1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer_1.backward(da)
        self.in_layer_0.backward(da)
        return None
示例#14
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # initialize of weights
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(H, V).astype("f")

        # make layers
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # conclude all of weights & grads.
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set word representations @ member vars.
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = 0.5 * (h0 + h1)
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)
        return None
示例#15
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # refresh weight
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # make class
        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # put all weights and gradients in one list
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # save a word's variance in instance variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
示例#16
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # refresh weight
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # make class
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # put all weights and graditents in one list
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # save a word's variance in instance variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class SimpleCBoW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # レイヤの生成
        self.in_layer0 = MatMul(W_in)  # Window sizeに依存 : ここでは1
        self.in_layer1 = MatMul(W_in)  # Window sizeに依存 : ここでは1
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # すべての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定する
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) / 2
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)
        return None
class SimpleSkipGram:
    def __init__(self, vocabulary_size, hidden_size):
        V, H = vocabulary_size, hidden_size

        # initialize weights
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # generate layers
        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # list all weights and gradiants
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set distributed representation of words to variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
示例#19
0
class Transformer(BaseModel):
    def __init__(self,
                 vocab_size,
                 wordvec_size,
                 head_size,
                 num_heads,
                 num_encoders=3,
                 num_decoders=3):
        S, D, H = vocab_size, wordvec_size, head_size
        rn = np.random.randn

        self.num_encoders = num_encoders
        self.num_decoders = num_decoders
        self.params, self.grads = [], []

        # Double embed (encoder, decoder)
        embed_W1 = (rn(S, D) / 100).astype('f')
        self.e_embed = PositionalEmbedding(embed_W1)
        self.params += self.e_embed.params
        self.grads += self.e_embed.grads

        self.encoders, self.decoders = [], []
        for _ in range(num_encoders):
            te = TransformerEncoder(wordvec_size=D,
                                    head_size=H,
                                    num_heads=num_heads)
            self.encoders.append(te)
            self.params += te.params
            self.grads += te.grads

        for _ in range(num_decoders):
            td = TransformerDecoder(wordvec_size=D,
                                    head_size=H,
                                    num_heads=num_heads)
            self.decoders.append(td)
            self.params += td.params
            self.grads += td.grads

        # 편의를 위해 linear 변수에 따로 weight 저장
        self.linear = MatMul((rn(D, S) / np.sqrt(D)).astype('f'))
        self.params += self.linear.params
        self.grads += self.linear.grads

        # TimeSoftmaxWithLoss도 params와 grads가 있으나 사용되지 않기때문에 생략
        self.softmax = TimeSoftmaxWithLoss(ignore_label=-1)

    def forward(self, xs, ts):
        # xs->(N,T) / eout, dout, ts->N,(T,D)
        eout = self.e_embed.forward(xs)
        dout = self.e_embed.forward(ts)
        N, T, D = eout.shape

        for encoder in self.encoders:
            eout = encoder.forward(eout)
        for decoder in self.decoders:
            ts = decoder.forward(dout, eout)

        ts = ts.reshape(N * T, D)
        # score->(N*T,S)
        score = self.linear.forward(ts)
        _, S = score.shape
        # 순서 주의 score는 linear된 2차원 행렬, xs는 임베딩되기전 2차원 행렬
        # loss->(N*T,1)
        score = score.reshape(N, T, S)
        loss = self.softmax.forward(score, xs)
        return loss

    def backward(self, dout=1):
        # dout->N,(T,S)
        dout = self.softmax.backward(dout)
        N, T, S = dout.shape
        dout = dout.reshape(N * T, S)
        # dout->(N*T,S) / self.linear.W->(D,S)
        dout = self.linear.backward(dout)
        # dout->(N*T,D)
        _, D = dout.shape
        dout = dout.reshape(N, T, D)

        # ddout->N,(T,D)
        for i in range(self.num_decoders - 1, 0, -1):
            _, dout = self.decoders[i].backward(dout)
        ddout, dout = self.decoders[0].backward(dout)

        # dout->N,(T,D)
        for i in range(self.num_encoders - 1, -1, -1):
            ddout = self.encoders[i].backward(ddout)

        self.e_embed.backward(ddout)

    def generate(self, xs, type='GPT'):
        sampled = []
        # 'GPT'는 transformer의 decoder만 이용
        if type == 'GPT':
            # xs->(T,), out->(T,D)
            out = self.e_embed.forward(xs)
            # out->(1,T,D)
            # out = out[np.newaxis,:]
            for i in range(self.num_decoders):
                out = self.decoders[i].generate(out)
            # out->(1,T,D)
            N, T, D = out.shape
            out = out.reshape(N * T, D)
            # score->(1,T,S)
            score = self.linear.forward(out)

            sampled = np.argmax(score, axis=-1).flatten()

        # 'BERT'는 transformer의 encoder만 이용
        # 하지만 아직 masking 처리가 되어있지 않은 구조고
        # positional embedding 이외에 segment embedding이 추가되어야함
        # 따라서 현재 이 코드에서 BERT는 사용하는 의미가 없으며 GPT를 이용해야함
        elif type == 'BERT':
            # xs->(T,), out->(T,D)
            out = self.e_embed.forward(xs)
            # out->(1,T,D)
            out = out[np.newaxis, :]
            for i in range(self.num_encoders):
                out = self.encoders[i].generate(out)

            # decoder의 linear를 그대로 이용하기로 하자
            N, T, D = out.shape
            out = out.reshape(N * T, D)
            # score->(1,T,S)
            score = self.linear.forward(out)

            sampled = np.argmax(score, axis=-1).flatten()
        else:
            print('invalid generate type')

        return sampled