class EmbeddingDot: def __init__(self, W): self.embed = Embedding(W) self.params = self.embed.params self.grads = self.embed.grads self.cache = None def forward(self, h, idx): target_W = self.embed.forward(idx) # (N, H) = (指定した単語ID数, 内部の重みの次元) out = np.sum(target_W * h, axis=1) # (N, H) * (N, H) -> (N, H) -> (N, 1) self.cache = (h, target_W) return out def backward(self, dout): h, target_W = self.cache dout = dout.reshape(dout.shape[0], 1) # target_Wの勾配 dtarget_W = dout * h self.embed.backward(dtarget_W) # 下流に伝搬させる勾配 dh = dout * target_W return dh
def forward(self, xs): N, T = xs.shape V, D = self.W.shape out = np.empty((N, T, D), dtype='f') self.layers = [] for t in range(T): layer = Embedding(self.W) out[:, t, :] = layer.forward(xs[:, t]) self.layers.append(layer) return out
def forward(self, xs): N, T = xs.shape # バッチサイズ、T個の時系列データ _, D = self.W.shape # vocab_size, wordvec_size out = np.empty((N, T, D), dtype='f') self.layers = [] for t in range(T): layer = Embedding(self.W) out[:, t, :] = layer.forward(xs[:, t]) self.layers.append(layer) return out
def forward(self, xs): N, T = xs.shape # hoho_todo, 如何传xs参数?(xs没有了D维) || xs的每一行只是文字ID的序列,形状如:[[1, 3, 2], [45, 2, 3], ...] V, D = self.W.shape out = np.empty((N, T, D), dtype='f') self.layers = [] for t in range(T): layer = Embedding(self.W) out[:, t, :] = layer.forward(xs[:, t]) self.layers.append(layer) return out
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # 가중치 초기화 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) # Embedding layer self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # 모든 가중치와 기울기를 배열에 모음 layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 인스턴스 변수에 단어의 분산 표현 저장 self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # 重みの初期化 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') # レイヤの生成 self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) # Embeddingレイヤを使用 self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # すべての重みと勾配を配列にまとめる layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): #10000, 100, 5, corpus V, H = vocab_size, hidden_size # 10000, 100 # 가중치 초기화 W_in = 0.01 * np.random.randn(V, H).astype('f') # 10000, 100 W_out = 0.01 * np.random.randn(V, H).astype('f') # 10000, 100 # 계층 생성 self.in_layers = [] for i in range(2 * window_size): # 10 layer = Embedding(W_in) # Embedding 계층 사용, weight-sharing 통해서 10개 layer 생성. self.in_layers.append(layer) # 10개 layer 이어줌, 객체 리스트 [객체1, 객체2, ...] self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # 모든 가중치와 기울기를 배열에 모은다. layers = self.in_layers + [self.ns_loss] # 객체들의 묶음, 객체 리스트 [layer객체1, layer객체2, ..., Negativ객체] - 11개 객체 self.params, self.grads = [], [] for layer in layers: # 10개 layer, 1개 negative params,grads를 모음 [array(layer1_weight), array(layer2_weight2), ..., array(negativ_weight(w_out)] self.params += layer.params self.grads += layer.grads # 인스턴스 변수에 단어의 분산 표현을 저장한다. self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # initialize weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') # create layer self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # combine all weights and grads into array layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # set word vector to member variable self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # initialize of weight W_in = 0.01 * np.random.randn(V, H).astype("f") W_out = 0.01 * np.random.randn(V, H).astype("f") # making layers self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) # using Embedding layer self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # conclude all of weights & grads layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # Set word representations as a member var. self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype("f") W_out = 0.01 * np.random.randn(V, H).astype("f") # レイヤの生成 self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) self.in_layers.append(layer) # 負例は5つサンプリングするように設定 self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # refresh weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') # make class self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) # using class Embedding self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # put all weights and gradients in one list layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # save a word's variance in instance variable self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # 重みの初期化 W_in = tf.Variable( tf.random.normal((V, H), mean=0.0, stddev=0.01, dtype='float')) W_out = tf.Variable( tf.random.normal((V, H), mean=0.0, stddev=0.01, dtype='float')) # レイヤの生成 self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) # Embeddingレイヤを使用 self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # すべての重みと勾配をリストにまとめる layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # メンバ変数に単語の分散表現を設定 self.word_vecs = W_in
class EmbeddingDot: def __init__(self, W): self.embed = Embedding(W) self.params = self.embed.params self.grads = self.embed.grads self.cache = None def forward(self, h, idx): target_W = self.embed.forward(idx) out = np.sum(target_W * h, axis=1) self.cache = (h, target_W) return out def backward(self, dout): h, target_W = self.cache dout = dout.reshape(dout.shape[0], 1) dtarget_W = dout * h self.embed.backward(dtarget_W) dh = dout * target_W return dh
class EmbeddingDot: # TODO 헷갈리지만 우선 넘어긴다. def __init__(self, W): self.embed = Embedding(W) self.params = self.embed.params self.grads = self.embed.grads self.cache = None # forward propagation 시 계산결과를 잠시 유지하기 위한 용도 def forward(self, h, idx): target_W = self.embed.forward(idx) out = np.sum(target_W * h, axis=1) # 내적 계산 self.cache = (h, target_W) return out def backward(self, dout): h, target_W = self.cache dout = dout.reshape(dout.shape[0], 1) dtarget_W = dout * h self.embed.backward(dtarget_W) # grads 계산 dh = dout * target_W return dh
class EmbeddingDot: def __init__(self, W): # 총 4개의 인스턴스 변수 self.embed = Embedding(W) # Embedding 계층 self.params = self.embed.params # 매개변수 저장 self.grads = self.embed.grads # 기울기 저장 self.cache = None # 순전파 시의 계산 결과를 잠시 유지하기 위해 사용되는 변수 # 순전파 메서드에서는 은닉층 뉴런과 단어 ID의 넘파이 배열(미니배치)을 받는다. def forward(self, h, idx): target_W = self.embed.forward(idx) # embedding 계층의 forward(idx)를 호출하여 idx에 해당하는 행 추출 out = np.sum(target_W * h, axis=1) # 내적 계산 이후 행마다 더하여 최종결과 out 반환 self.cache = (h, target_W) return out def backward(self, dout): h, target_W = self.cache dout = dout.reshape(dout.shape[0], 1) dtarget_W = dout * h self.embed.backward(dtarget_W) dh = dout * target_W return dh
class EmbeddingDot: def __init__(self, W): self.embed = Embedding(W) self.params = self.embed.params self.grads = self.embed.grads self.cache = None def forward(self, h, idx): target_W = self.embed.forward(idx) # Wを反転させてないため、複雑な形だが、内積をしたいだけ。 out = np.sum(target_W * h, axis=1) self.cache = (h, target_W) return out def backward(self, dout): h, target_W = self.cache # Wを反転させてなかったため、ここで辻褄合わせをしている。 dout = dout.reshape(dout.shape[0], 1) dtarget_W = dout * h self.embed.backward(dtarget_W) dh = dout * target_W return dh
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75) layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # 初始化權重 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') # 產生各層 self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) # 使用Embedding層 self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # 把所有權重與梯度整合成清單 layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 在成員變數設定字詞的分散式表示 self.word_vecs = W_in
def __init__(self, vocab_size, hidden_size, window_size, corpus): V, H = vocab_size, hidden_size # 初始化权重 W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(V, H).astype('f') # 生成层 self.in_layers = [] for i in range(2 * window_size): layer = Embedding(W_in) # 使用Embedding层 self.in_layers.append(layer) self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5) # 将所有的权重和梯度整理到列表中 layers = self.in_layers + [self.ns_loss] self.params, self.grads = [], [] for layer in layers: self.params += layer.params self.grads += layer.grads # 将单词的分布式表示设置为成员变量 self.word_vecs = W_in
def __init__(self, W): # 총 4개의 인스턴스 변수 self.embed = Embedding(W) # Embedding 계층 self.params = self.embed.params # 매개변수 저장 self.grads = self.embed.grads # 기울기 저장 self.cache = None # 순전파 시의 계산 결과를 잠시 유지하기 위해 사용되는 변수
def __init__(self, W): self.embed = Embedding(W) self.params = self.embed.params self.grads = self.embed.grads self.cache = None
def __init__(self, W): self.embed = Embedding(W) self.params = self.embed.params self.grads = self.embed.grads self.cache = None # forward propagation 시 계산결과를 잠시 유지하기 위한 용도