示例#1
0
            labels=tf.expand_dims(y, axis=1),
            inputs=context_embedding,
            num_sampled=5,  # 采样 5 个负样本
            num_classes=self.v_dim
        ))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            loss = self.loss(x, y, True)
            grads = tape.gradient(loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return loss.numpy()


def train(model, data):
    for t in range(2500):
        bx, by = data.sample(8)
        loss = model.step(bx, by)
        if t % 200 == 0:
            print("step: {} | loss: {}".format(t, loss))


if __name__ == "__main__":
    data = process_w2v_data(corpus, skip_window=2, method="cbow")
    # 词汇表共有 data.num_word 个词, 词向量长度为 2
    model = CBOW(data.num_word, 2)
    train(model, data)

    # plot
    show_w2v_word_embedding(model, data, "./cbow.png")
示例#2
0
            tf.nn.nce_loss(weights=self.nce_w,
                           biases=self.nce_b,
                           labels=tf.expand_dims(y, axis=1),
                           inputs=embedded,
                           num_sampled=5,
                           num_classes=self.v_dim))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            loss = self.loss(x, y, True)
            grads = tape.gradient(loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return loss.numpy()


def train(model, data):
    for t in range(2500):
        bx, by = data.sample(8)
        loss = model.step(bx, by)
        if t % 200 == 0:
            print("step: {} | loss: {}".format(t, loss))


if __name__ == "__main__":
    d = process_w2v_data(corpus, skip_window=2, method="cbow")
    m = CBOW(d.num_word, 2)
    train(m, d)

    # plotting
    show_w2v_word_embedding(m, d, "./visual/results/cbow.png")
示例#3
0
            tf.nn.nce_loss(weights=self.nce_w,
                           biases=self.nce_b,
                           labels=tf.expand_dims(y, axis=1),
                           inputs=embedded,
                           num_sampled=5,
                           num_classes=self.v_dim))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            loss = self.loss(x, y, True)
            grads = tape.gradient(loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return loss.numpy()


def train(model, data):
    for t in range(2500):
        bx, by = data.sample(8)
        loss = model.step(bx, by)
        if t % 200 == 0:
            print("step: {} | loss: {}".format(t, loss))


if __name__ == "__main__":
    d = process_w2v_data(corpus, skip_window=2, method="skip_gram")
    m = SkipGram(d.num_word, 2)
    train(m, d)

    # plotting
    show_w2v_word_embedding(m, d, "./visual/results/skipgram.png")
示例#4
0
            tf.nn.nce_loss(
                weights=self.nec_w,
                biases=self.nec_b,
                labels=tf.expand_dims(y, axis=1),
                inputs=embedded,
                num_sampled=5,
                num_classes=self.v_dim
            )
        )

    def step(self, x, y):
        with tf.GradientTape() as tape:
            loss = self.loss(x, y, True)
            grads = tape.gradient(loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return loss.numpy()

def train(model,data):
    for t in range(5000):
        bx, by = data.sample(8)
        loss = model.step(bx, by)
        if t % 200 ==0:
            print("step:{} | loss:{}".format(t, loss))

if __name__ == '__main__':
    d = process_w2v_data(corpus, skip_window=2, method="skip_gram")
    m = SkipGram(d.num_word, 2)
    train(m, d)

    show_w2v_word_embedding(m, d, "/Users/troy/work/ai/trader_one/com/troy/data/result/skip-gram.png")
示例#5
0
    def loss(self, x, y, training=None):
        embedded = self.call(x, training)
        return tf.reduce_mean(
            tf.nn.nce_loss(weights=self.nce_w,
                           biases=self.nce_b,
                           labels=tf.expand_dims(y, axis=1),
                           inputs=embedded,
                           num_sampled=5,
                           num_classes=self.v_dim))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            _loss = self.loss(x, y, True)
            grads = tape.gradient(_loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return _loss.numpy()


data = process_w2v_data(corpus, skip_window=2, method="skip_gram")
model = SkipGram(data.num_word, 2)

# training
for t in range(2500):
    bx, by = data.sample(8)
    loss = model.step(bx, by)
    if t % 200 == 0:
        print("step: {} | loss: {}".format(t, loss))

# plotting
show_w2v_word_embedding(model, data)
示例#6
0
    def loss(self, x, y, training=None):
        embedded = self.call(x, training)
        return tf.reduce_mean(
            tf.nn.nce_loss(weights=self.nce_w,
                           biases=self.nce_b,
                           labels=tf.expand_dims(y, axis=1),
                           inputs=embedded,
                           num_sampled=5,
                           num_classes=self.v_dim))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            _loss: tf.Tensor = self.loss(x, y, True)
            grads = tape.gradient(_loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return _loss.numpy()


data = process_w2v_data(corpus, skip_window=2, method="skip_gram")
model = SkipGram(data.num_word, 2)

# training
for t in range(2500):
    bx, by = data.sample(8)
    loss = model.step(bx, by)
    if t % 200 == 0:
        print("step: {} | loss: {}".format(t, loss))

# plotting
show_w2v_word_embedding(model, data, "./visual/results/skip_gram.png")
示例#7
0
                           num_sampled=5,
                           num_classes=self.v_dim))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            loss = self.loss(x, y, True)
            grads = tape.gradient(loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return loss.numpy()


def train(model, data):
    for t in range(2500):
        bx, by = data.sample(8)
        loss = model.step(bx, by)
        if t % 200 == 0:
            print("step: {} | loss: {}".format(t, loss))


if __name__ == "__main__":
    d = process_w2v_data(corpus, skip_window=2, method="skip_gram")
    m = SkipGram(d.num_word, 2)
    train(m, d)

    # plot
    show_w2v_word_embedding(m, d, "./skipgram.png")

# 注意, word2vec 无法处理一词多义的情况。
# 比如在"我是阳光男孩"和"今天阳光明媚"中, word2vec认为"阳光"都是一样的含义。
# 解决办法: 如果能考虑到句子上下文的信息, 那么这个词向量就能表达词语在不同句子中不同的含义了。
示例#8
0
            tf.nn.nce_loss(weights=self.nce_w,
                           biases=self.nce_b,
                           labels=tf.expand_dims(y, axis=1),
                           inputs=embedded,
                           num_sampled=5,
                           num_classes=self.v_dim))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            loss = self.loss(x, y, True)
            grads = tape.gradient(loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return loss.numpy()


def train(model, data):
    for t in range(2500):
        bx, by = data.sample(8)
        loss = model.step(bx, by)
        if t % 200 == 0:
            print("step: {} | loss: {}".format(t, loss))


if __name__ == "__main__":
    d = process_w2v_data(corpus, skip_window=2, method="skip_gram")
    m = SkipGram(d.num_word, 2)
    train(m, d)

    # plotting
    show_w2v_word_embedding(m, d, "./word2vector/skipgram_result.png")
示例#9
0
            tf.nn.nce_loss(weights=self.nce_w,
                           biases=self.nce_b,
                           labels=tf.expand_dims(y, axis=1),
                           inputs=embedded,
                           num_sampled=5,
                           num_classes=self.v_dim))

    def step(self, x, y):
        with tf.GradientTape() as tape:
            loss = self.loss(x, y, True)
            grads = tape.gradient(loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return loss.numpy()


def train(model, data):
    for t in range(2500):
        bx, by = data.sample(8)
        loss = model.step(bx, by)
        if t % 200 == 0:
            print("step: {} | loss: {}".format(t, loss))


if __name__ == "__main__":
    d = process_w2v_data(corpus, skip_window=2, method="cbow")
    m = CBOW(d.num_word, 2)
    train(m, d)

    # plotting
    show_w2v_word_embedding(m, d, "./word2vector/cbow_result.png")