return tf.reduce_mean( tf.nn.nce_loss(weights=self.nce_w, biases=self.nce_b, labels=tf.expand_dims(y, axis=1), inputs=embedded, num_sampled=5, num_classes=self.v_dim)) def step(self, x, y): with tf.GradientTape() as tape: loss = self.loss(x, y, True) grads = tape.gradient(loss, self.trainable_variables) self.opt.apply_gradients(zip(grads, self.trainable_variables)) return loss.numpy() def train(model, data): for t in range(2500): bx, by = data.sample(8) loss = model.step(bx, by) if t % 200 == 0: print("step: {} | loss: {}".format(t, loss)) if __name__ == "__main__": d = process_w2v_data(corpus, skip_window=2, method="cbow") m = CBOW(d.num_word, 2) train(m, d) # plotting show_w2v_word_embedding(m, d, "./visual/results/cbow.png")
# in order to reduce the computation of full softmax def loss(self, x, y, training=None): embedded = self.call(x, training) return tf.reduce_mean( tf.nn.nce_loss(weights=self.nce_w, biases=self.nce_b, labels=tf.expand_dims(y, axis=1), inputs=embedded, num_sampled=5, num_classes=self.v_dim)) def step(self, x, y): with tf.GradientTape() as tape: _loss: tf.Tensor = self.loss(x, y, True) grads = tape.gradient(_loss, self.trainable_variables) self.opt.apply_gradients(zip(grads, self.trainable_variables)) return _loss.numpy() data = process_w2v_data(corpus, skip_window=2, method="skip_gram") model = SkipGram(data.num_word, 2) # training for t in range(2500): bx, by = data.sample(8) loss = model.step(bx, by) if t % 200 == 0: print("step: {} | loss: {}".format(t, loss)) # plotting show_w2v_word_embedding(model, data, "visual_helper/skip_gram.png")