Пример #1
0
def main():
    # get and process data
    data = utils.DateData(2000)
    print("Chinese time order: yy/mm/dd ", data.date_cn[:3],
          "\nEnglish time order: dd/M/yyyy ", data.date_en[:3])
    print("vocabularies: ", data.vocab)
    print(
        "x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]),
        "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]),
                                            data.y[0]))

    model = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_HEAD, data.num_word,
                        DROP_RATE)
    # training
    t0 = time.time()
    for t in range(1000):
        bx, by, seq_len = data.sample(64)
        bx, by = utils.pad_zero(bx, max_len=MAX_LEN), utils.pad_zero(
            by, max_len=MAX_LEN + 1)
        loss = model.step(bx, by)
        if t % 50 == 0:
            logits = model(bx[:1], by[:1, :-1], False)[0].numpy()
            t1 = time.time()
            print(
                "step: ",
                t,
                "| time: %.2f" % (t1 - t0),
                "| loss: %.4f" % loss.numpy(),
                "| target: ",
                "".join([
                    data.i2v[i] for i in by[0, 1:] if i != data.v2i["<PAD>"]
                ]),
                "| inference: ",
                "".join([
                    data.i2v[i] for i in np.argmax(logits, axis=1)
                    if i != data.v2i["<PAD>"]
                ]),
            )
            t0 = t1

    os.makedirs("./visual_helper/transformer", exist_ok=True)
    model.save_weights("./visual_helper/transformer/model.ckpt")
    with open("./visual_helper/transformer_v2i_i2v.pkl", "wb") as f:
        pickle.dump({"v2i": data.v2i, "i2v": data.i2v}, f)

    # prediction
    src_seq = "02-11-30"
    print("src: ", src_seq, "\nprediction: ",
          model.translate(src_seq, data.v2i, data.i2v))

    # save attention matrix for visualization
    _ = model(bx[:1], by[:1, :-1], training=False)

    data = {
        "src": [data.i2v[i] for i in data.x[0]],
        "tgt": [data.i2v[i] for i in data.y[0]],
        "attentions": model.attentions
    }
    with open("./visual_helper/transformer_attention_matrix.pkl", "wb") as f:
        pickle.dump(data, f)
Пример #2
0
def train():
    # get and process data
    data = utils.DateData(4000)
    print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3])
    print("vocabularies: ", data.vocab)
    print("x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]),
          "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0]))

    model = Seq2Seq(
        data.num_word, data.num_word, emb_dim=16, units=32,
        max_pred_len=11, start_token=data.start_token, end_token=data.end_token)

    # training
    for t in range(1500):
        bx, by, decoder_len = data.sample(32)
        loss = model.step(bx, by, decoder_len)
        if t % 70 == 0:
            target = data.idx2str(by[0, 1:-1])
            pred = model.inference(bx[0:1])
            res = data.idx2str(pred[0])
            src = data.idx2str(bx[0])
            print(
                "t: ", t,
                "| loss: %.3f" % loss,
                "| input: ", src,
                "| target: ", target,
                "| inference: ", res,
            )
Пример #3
0
def train():
    # get and process data
    data = utils.DateData(DataSize)
    train_x, train_y, train_l = data.sample(DataSize)

    print("Chinese time order: yy/mm/dd ", data.date_cn[:3],
          "\nEnglish time order: dd/M/yyyy ", data.date_en[:3])
    print("vocabularies: ", data.vocab)
    print(
        "x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]),
        "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]),
                                            data.y[0]))

    model = Seq2Seq(data.num_word,
                    data.num_word,
                    emb_dim=16,
                    units=32,
                    max_pred_len=11,
                    start_token=data.start_token,
                    end_token=data.end_token)
    model.compile(optimizer=keras.optimizers.Adam(Learn_rate),
                  loss=keras.losses.SparseCategoricalCrossentropy(False),
                  metrics=[keras.metrics.sparse_categorical_accuracy])
    model.fit((train_x, train_y),
              train_y,
              callbacks=[myTensorboard(data)],
              batch_size=Batch_size,
              epochs=Epochs)
Пример #4
0
def train():
    # get and process data
    data = utils.DateData(2000)
    print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3])
    print("vocabularies: ", data.vocab)
    print("x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]),
          "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0]))

    model = Seq2Seq(
        data.num_word, data.num_word, emb_dim=12, units=14, attention_layer_size=16,
        max_pred_len=11, start_token=data.start_token, end_token=data.end_token)

    # training
    for t in range(1000):
        bx, by, decoder_len = data.sample(64)
        loss = model.step(bx, by, decoder_len)
        if t % 70 == 0:
            target = data.idx2str(by[0, 1:-1])
            pred = model.inference(bx[0:1])
            res = data.idx2str(pred[0])
            src = data.idx2str(bx[0])
            print(
                "t: ", t,
                "| loss: %.5f" % loss,
                "| input: ", src,
                "| target: ", target,
                "| inference: ", res,
            )

    pkl_data = {"i2v": data.i2v, "x": data.x[:6], "y": data.y[:6], "align": model.inference(data.x[:6], return_align=True)}

    with open("./visual/tmp/attention_align.pkl", "wb") as f:
        pickle.dump(pkl_data, f)
Пример #5
0
def train():
    dataset = utils.DateData(4000)
    print("Chinese time order: yy/mm/dd ",dataset.date_cn[:3],"\nEnglish time order: dd/M/yyyy", dataset.date_en[:3])
    print("Vocabularies: ", dataset.vocab)
    print(f"x index sample:  \n{dataset.idx2str(dataset.x[0])}\n{dataset.x[0]}",
    f"\ny index sample:  \n{dataset.idx2str(dataset.y[0])}\n{dataset.y[0]}")
    loader = DataLoader(dataset,batch_size=32,shuffle=True)
    model = Seq2Seq(dataset.num_word,dataset.num_word,emb_dim=16,units=32,max_pred_len=11,start_token=dataset.start_token,end_token=dataset.end_token)
    for i in range(100):
        for batch_idx , batch in enumerate(loader):
            bx, by, decoder_len = batch
            loss = model.step(bx,by)
            if batch_idx % 70 == 0:
                target = dataset.idx2str(by[0, 1:-1].data.numpy())
                pred = model.inference(bx[0:1])
                res = dataset.idx2str(pred[0].data.numpy())
                src = dataset.idx2str(bx[0].data.numpy())
                print(
                    "Epoch: ",i,
                    "| t: ", batch_idx,
                    "| loss: %.3f" % loss,
                    "| input: ", src,
                    "| target: ", target,
                    "| inference: ", res,
                )
Пример #6
0
def train(emb_dim=32,n_layer=3,n_head=4):
    
    dataset = utils.DateData(4000)
    print("Chinese time order: yy/mm/dd ",dataset.date_cn[:3],"\nEnglish time order: dd/M/yyyy", dataset.date_en[:3])
    print("Vocabularies: ", dataset.vocab)
    print(f"x index sample:  \n{dataset.idx2str(dataset.x[0])}\n{dataset.x[0]}",
    f"\ny index sample:  \n{dataset.idx2str(dataset.y[0])}\n{dataset.y[0]}")
    loader = DataLoader(dataset,batch_size=32,shuffle=True)
    model = Transformer(n_vocab=dataset.num_word, max_len=MAX_LEN, n_layer = n_layer, emb_dim=emb_dim, n_head = n_head, drop_rate=0.1, padding_idx=0)
    if torch.cuda.is_available():
        print("GPU train avaliable")
        device =torch.device("cuda")
        model = model.cuda()
    else:
        device = torch.device("cpu")
        model = model.cpu()
    for i in range(100):
        for batch_idx , batch in enumerate(loader):
            bx, by, decoder_len = batch
            bx, by = torch.from_numpy(utils.pad_zero(bx,max_len = MAX_LEN)).type(torch.LongTensor).to(device), torch.from_numpy(utils.pad_zero(by,MAX_LEN+1)).type(torch.LongTensor).to(device)
            loss, logits = model.step(bx,by)
            if batch_idx%50 == 0:
                target = dataset.idx2str(by[0, 1:-1].cpu().data.numpy())
                pred = model.translate(bx[0:1],dataset.v2i,dataset.i2v)
                res = dataset.idx2str(pred[0].cpu().data.numpy())
                src = dataset.idx2str(bx[0].cpu().data.numpy())
                print(
                    "Epoch: ",i,
                    "| t: ", batch_idx,
                    "| loss: %.3f" % loss,
                    "| input: ", src,
                    "| target: ", target,
                    "| inference: ", res,
                )
Пример #7
0
def train():
    # get and process data
    data = utils.DateData(4000)  # 由语料库、中英文日期 (字符串形式和索引形式)构成
    print("Chinese time order: yy/mm/dd ", data.date_cn[:3],
          "\nEnglish time order: dd/M/yyyy ", data.date_en[:3])
    print("vocabularies: ", data.vocab)
    print(
        "x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]),
        "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]),
                                            data.y[0]))

    model = Seq2Seq(data.num_word,
                    data.num_word,
                    emb_dim=16,
                    units=32,
                    max_pred_len=11,
                    start_token=data.start_token,
                    end_token=data.end_token)

    # train
    for t in range(1500):
        bx, by, decoder_len = data.sample(32)
        loss = model.step(bx, by, decoder_len)
        if t % 70 == 0:
            target = data.idx2str(by[0, 1:-1])
            # 每次只翻译一个序列, 因此也只是返回一个翻译后的目标序列, 由索引组成
            pred = model.inference(bx[0:1])
            # 将由索引组成的目标序列转换成字符串形式
            res = data.idx2str(pred[0])
            # 将由索引组成的源序列转换成字符串形式
            src = data.idx2str(bx[0])
            print(
                "step:",
                t,
                "| loss:",
                loss,
                "| input:",
                src,
                "| target:",
                target,
                "| inference:",
                res,
            )
Пример #8
0
    model.save_weights("./visual/models/transformer/model.ckpt")
    with open("./visual/tmp/transformer_v2i_i2v.pkl", "wb") as f:
        pickle.dump({"v2i": data.v2i, "i2v": data.i2v}, f)


def export_attention(model, data):
    with open("./visual/tmp/transformer_v2i_i2v.pkl", "rb") as f:
        dic = pickle.load(f)
    model.load_weights("./visual/models/transformer/model.ckpt")
    bx, by, seq_len = data.sample(32)
    model.translate(bx, dic["v2i"], dic["i2v"])
    attn_data = {
        "src": [[data.i2v[i] for i in bx[j]] for j in range(len(bx))],
        "tgt": [[data.i2v[i] for i in by[j]] for j in range(len(by))],
        "attentions": model.attentions
    }
    with open("./visual/tmp/transformer_attention_matrix.pkl", "wb") as f:
        pickle.dump(attn_data, f)


if __name__ == "__main__":
    d = utils.DateData(4000)
    print("Chinese time order: yy/mm/dd ", d.date_cn[:3],
          "\nEnglish time order: dd/M/yyyy ", d.date_en[:3])
    print("vocabularies: ", d.vocab)
    print("x index sample: \n{}\n{}".format(d.idx2str(d.x[0]), d.x[0]),
          "\ny index sample: \n{}\n{}".format(d.idx2str(d.y[0]), d.y[0]))

    m = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_HEAD, d.num_word, DROP_RATE)
    train(m, d, step=600)
    export_attention(m, d)
Пример #9
0
                *res,
                '\n',
            )
        super(myTensorboard, self).on_epoch_end(epoch, logs)


def load_data(data, size):
    x, y, seq_len = data.sample(size)
    x = utils.pad_zero(x, MAX_LEN)
    y = utils.pad_zero(y, MAX_LEN + 1)
    return (x, y[:, :-1]), y[:, 1:]


def train(model: Transformer, data):
    x, y = load_data(data, DATA_SIZE)
    tb = myTensorboard(data)
    model.compile(keras.optimizers.Adam(LEARN_RATE), loss=Loss())
    model.fit(x, y, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[tb])


if __name__ == "__main__":
    d = utils.DateData(DATA_SIZE)
    print("Chinese time order: yy/mm/dd ", d.date_cn[:3],
          "\nEnglish time order: dd/M/yyyy ", d.date_en[:3])
    print("vocabularies: ", d.vocab)
    print("x index sample: \n{}\n{}".format(d.idx2str(d.x[0]), d.x[0]),
          "\ny index sample: \n{}\n{}".format(d.idx2str(d.y[0]), d.y[0]))
    m = Transformer(MODEL_DIM, MAX_LEN, N_LAYER, N_LAYER, N_HEAD, d.num_word)
    m.build([[None, 12], [None, 12]])
    train(m, d)
Пример #10
0
        o, _, _ = self.decoder_train(dec_emb_in, s, sequence_length=seq_len)
        logits = o.rnn_output
        return logits

    def step(self, x, y, seq_len):
        with tf.GradientTape() as tape:
            logits = self.train_logits(x, y, seq_len)
            dec_out = y[:, 1:]  # ignore <GO>
            _loss = self.cross_entropy(dec_out, logits)
            grads = tape.gradient(_loss, self.trainable_variables)
        self.opt.apply_gradients(zip(grads, self.trainable_variables))
        return _loss.numpy()


# get and process data
data = utils.DateData(2000)
print("Chinese time order: yy/mm/dd ", data.date_cn[:3], "\nEnglish time order: dd/M/yyyy ", data.date_en[:3])
print("vocabularies: ", data.vocab)
print("x index sample: \n{}\n{}".format(data.idx2str(data.x[0]), data.x[0]),
      "\ny index sample: \n{}\n{}".format(data.idx2str(data.y[0]), data.y[0]))

model = Seq2Seq(
    data.num_word, data.num_word, emb_dim=16, units=32,
    max_pred_len=11, start_token=data.start_token, end_token=data.end_token)

# training
for t in range(1500):
    bx, by, decoder_len = data.sample(32)
    loss = model.step(bx, by, decoder_len)
    if t % 30 == 0:
        target = data.idx2str(by[0, 1:-1])
Пример #11
0
            print(
                "t: ",
                t,
                "| loss: %.3f" % loss,
                "| input: ",
                src,
                "| target: ",
                target,
                "| inference: ",
                res,
            )
    return LOSS


if __name__ == '__main__':
    data = utils.DateData(4000)  # 输入数据
    m_time = {}
    epochs = 1001

    import time
    start = time.time()
    #  RNN: seq2seq
    model = Seq2Seq(data.num_word,
                    data.num_word,
                    emb_dim=16,
                    units=32,
                    max_pred_len=11,
                    start_token=data.start_token,
                    end_token=data.end_token)
    LOSS = train(data, model, epochs)
    end = time.time()