Exemplo n.º 1
0
from common_train import Trainer
from lm_loss import LogLoss
from lstm_dataset import S2SDataSet
from lstm_graph import BiLSTMEncodeGraph
from ndnn.sgd import Adam
from vocab_dict import get_dict

vocab_dict, idx_dict = get_dict()

train_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.train.tsv")
dev_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.dev.tsv")
test_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.test.tsv")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

trainer = Trainer()
graph = BiLSTMEncodeGraph(LogLoss(), Adam(eta=0.001, decay=0.99), dict_size, hidden_dim)
trainer.train(idx_dict, 100, 's2s_bilstm', graph, train_ds, dev_ds, test_ds, 50)
Exemplo n.º 2
0
from common_train import Trainer
from lm_loss import LogLoss
from lstm_dataset import S2SDataSet
from lstm_graph import LSTMEncodeGraph
from ndnn.sgd import Adam
from vocab_dict import get_dict

vocab_dict, idx_dict = get_dict()

train_ds = S2SDataSet(vocab_dict, idx_dict,
                      "bobsue-data/bobsue.seq2seq.train.tsv")
dev_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.dev.tsv")
test_ds = S2SDataSet(vocab_dict, idx_dict,
                     "bobsue-data/bobsue.seq2seq.test.tsv")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

trainer = Trainer()
graph = LSTMEncodeGraph(LogLoss(), Adam(eta=0.001, decay=0.99), dict_size,
                        hidden_dim)
trainer.train(idx_dict, 100, 's2s_lstm', graph, train_ds, dev_ds, test_ds, 50)
Exemplo n.º 3
0
from common_train import Trainer
from lstm_dataset import LSTMDataSet
from lstm_graph import LogGraph
from ndnn.sgd import Adam
from vocab_dict import get_dict

vocab_dict, idx_dict = get_dict()

train_ds = LSTMDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.lm.train.txt")
dev_ds = LSTMDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.lm.dev.txt")
test_ds = LSTMDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.lm.test.txt")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

graph = LogGraph(Adam(eta=0.001, decay=0.99), dict_size, hidden_dim)

trainer = Trainer()
trainer.train(idx_dict, 100, 'lm_logloss', graph, train_ds, dev_ds, test_ds,
              50)
Exemplo n.º 4
0
from ndnn.sgd import Adam
from ndnn.store import ParamStore
from report_stat import LogFile, ErrorStat
from vocab_dict import get_dict

vocab_dict, idx_dict = get_dict()

train_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.train.tsv")
dev_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.dev.tsv")
test_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.test.tsv")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

decode_graph = LSTMGraph(LogLoss(), Adam(eta=0.001, decay=0.99), dict_size, hidden_dim)

enc_lstm_graph = LSTMGraph(None, None, dict_size, hidden_dim)
enc_lstm_store = ParamStore('lstm_encoder.mdl')
enc_lstm_graph.load(enc_lstm_store.load())


def lstm_encode(data):
    enc_lstm_graph.reset()

    b_size, data_len = data.shape

    enc_lstm_graph.h0.value = np.zeros([b_size, hidden_dim])
    enc_lstm_graph.c0.value = np.zeros([b_size, hidden_dim])

    h = enc_lstm_graph.h0
Exemplo n.º 5
0
from common_train import Trainer
from ndnn.rnn.lm_loss import LogLoss
from ndnn.rnn.lstm_dataset import S2SDict, S2SDataSet
from ndnn.rnn.lstm_graph import BiLSTMEncodeGraph
from ndnn.sgd import Adam

dict = S2SDict(["data/part.train", "data/whole.test"])

train_ds = S2SDataSet(dict.enc_dict, dict.dec_dict, "data/part.train")
test_ds = S2SDataSet(dict.enc_dict, dict.dec_dict, "data/whole.test")

hidden_dim = 200
batch_size = 50

trainer = Trainer()

lstm_graph = BiLSTMEncodeGraph(LogLoss(), Adam(eta=0.001, decay=0.99),
                               len(dict.enc_dict), len(dict.dec_dict),
                               hidden_dim)
trainer.train(100, 'part_whole', lstm_graph, train_ds, test_ds, test_ds, 50)
Exemplo n.º 6
0
from common_train import Trainer
from lstm_dataset import LSTMDataSet
from lstm_graph import HingeGraph
from ndnn.sgd import Adam
from vocab_dict import get_dict

vocab_dict, idx_dict = get_dict()

train_ds = LSTMDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.lm.train.txt")
dev_ds = LSTMDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.lm.dev.txt")
test_ds = LSTMDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.lm.test.txt")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

trainer = Trainer()

# Share Embedding
sem_graph = HingeGraph(Adam(eta=0.001), dict_size, hidden_dim, -1, False)
trainer.train(idx_dict, 100, 'lm_hingeloss_sem', sem_graph, train_ds, dev_ds, test_ds, 50)

all_graph = HingeGraph(Adam(eta=0.001), dict_size, hidden_dim, -1, True)
trainer.train(idx_dict, 100, 'lm_hingeloss_all', all_graph, train_ds, dev_ds, test_ds, 50)

r100_graph = HingeGraph(Adam(eta=0.001), dict_size, hidden_dim, 100, True)
trainer.train(idx_dict, 100, 'lm_hingeloss_r100', all_graph, train_ds, dev_ds, test_ds, 50)

r10_graph = HingeGraph(Adam(eta=0.001), dict_size, hidden_dim, 10, True)
trainer.train(idx_dict, 100, 'lm_hingeloss_r10', all_graph, train_ds, dev_ds, test_ds, 50)
Exemplo n.º 7
0
from common_train import Trainer
from lm_loss import LogLoss
from lstm_dataset import S2SDataSet
from lstm_graph import AttentionGraph
from ndnn.sgd import Adam
from vocab_dict import get_dict

vocab_dict, idx_dict = get_dict()

train_ds = S2SDataSet(vocab_dict, idx_dict,
                      "bobsue-data/bobsue.seq2seq.train.tsv")
dev_ds = S2SDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.seq2seq.dev.tsv")
test_ds = S2SDataSet(vocab_dict, idx_dict,
                     "bobsue-data/bobsue.seq2seq.test.tsv")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

trainer = Trainer()

attention_graph = AttentionGraph(LogLoss(), Adam(eta=0.001), dict_size,
                                 hidden_dim)
trainer.train(idx_dict, 100, 's2s_attention', attention_graph, train_ds,
              dev_ds, test_ds, 50)
Exemplo n.º 8
0
from ndnn.dataset import Batch
from ndnn.sgd import Adam
from ndnn.store import ParamStore
from vocab_dict import get_dict, translate

vocab_dict, idx_dict = get_dict()

lmdev_ds = LSTMDataSet(vocab_dict, idx_dict, "bobsue-data/bobsue.lm.dev.txt")
s2strain_ds = S2SDataSet(vocab_dict, idx_dict,
                         "bobsue-data/bobsue.seq2seq.train.tsv")

dict_size = len(vocab_dict)
hidden_dim = 200
batch_size = 50

lstm_encode_graph = LSTMEncodeGraph(LogLoss(), Adam(eta=0.001), dict_size,
                                    hidden_dim)
lstm_encode_store = ParamStore("model/s2s_lstm.mdl")
lstm_encode_graph.load(lstm_encode_store.load())

bilstm_encode_graph = BiLSTMEncodeGraph(LogLoss(), Adam(eta=0.001), dict_size,
                                        hidden_dim)
bilstm_encode_store = ParamStore("model/s2s_bilstm.mdl")
bilstm_encode_graph.load(bilstm_encode_store.load())

bow_encode_graph = BowEncodeGraph(LogLoss(), Adam(eta=0.001), dict_size,
                                  hidden_dim)
bow_encode_store = ParamStore("model/s2s_bow.mdl")
bow_encode_graph.load(bow_encode_store.load())

encode_graphs = [lstm_encode_graph, bilstm_encode_graph, bow_encode_graph]