Пример #1
0
import os, sys
import dataloader as dd
from keras.optimizers import *
from keras.callbacks import *

itokens, otokens = dd.MakeS2SDict('en2de.s2s.txt', dict_file='en2de_word.txt')
Xtrain, Ytrain = dd.MakeS2SData('en2de.s2s.txt',
                                itokens,
                                otokens,
                                h5_file='en2de.h5')
Xvalid, Yvalid = dd.MakeS2SData('en2de.s2s.valid.txt',
                                itokens,
                                otokens,
                                h5_file='en2de.valid.h5')

print('seq 1 words:', itokens.num())
print('seq 2 words:', otokens.num())
print('train shapes:', Xtrain.shape, Ytrain.shape)
print('valid shapes:', Xvalid.shape, Yvalid.shape)
'''
from rnn_s2s import RNNSeq2Seq
s2s = RNNSeq2Seq(itokens,otokens, 256)
s2s.compile('rmsprop')
s2s.model.fit([Xtrain, Ytrain], None, batch_size=64, epochs=30, validation_data=([Xvalid, Yvalid], None))
'''

from transformer import Transformer, LRSchedulerPerStep, LRSchedulerPerEpoch

d_model = 256
s2s = Transformer(itokens, otokens, len_limit=70, d_model=d_model, d_inner_hid=512, \
                  n_head=4, d_k=64, d_v=64, layers=2, dropout=0.1)
import os, sys
import dataloader as dd
from keras.optimizers import *
from keras.callbacks import *

itokens, otokens = dd.MakeS2SDict('data/ara.txt',
                                  dict_file='data/ara_word.txt')
Xtrain, Ytrain = dd.MakeS2SData('data/ara.txt',
                                itokens,
                                otokens,
                                h5_file='data/ara.h5')
Xvalid, Yvalid = dd.MakeS2SData('data/ara.txt',
                                itokens,
                                otokens,
                                h5_file='data/ara.valid.h5')

print('seq 1 words:', itokens.num())
print('seq 2 words:', otokens.num())
print('train shapes:', Xtrain.shape, Ytrain.shape)
print('valid shapes:', Xvalid.shape, Yvalid.shape)
'''
from rnn_s2s import RNNSeq2Seq
s2s = RNNSeq2Seq(itokens,otokens, 256)
s2s.compile('rmsprop')
s2s.model.fit([Xtrain, Ytrain], None, batch_size=64, epochs=30, validation_data=([Xvalid, Yvalid], None))
'''

from transformer import Transformer, LRSchedulerPerStep, LRSchedulerPerEpoch

d_model = 256
s2s = Transformer(itokens, otokens, len_limit=70, d_model=d_model, d_inner_hid=512, \
    'd_k': d_k,
    'd_v': d_v,
    'len_limit': len_limit,
    'dropout': dropout,
    'batch_size': batch_size,
    'max_len': max_len
}
filepath = createHistoryFile(model_parameters, sys.argv)

############### Load trainingsdata ################
if 'testdata' in sys.argv:
    itokens, otokens = dd.MakeS2SDict(
        'data/test_subset/en2de.s2s.txt',
        dict_file='data/test_subset/en2de_word.txt')
    Xtrain, Ytrain = dd.MakeS2SData('data/test_subset/en2de.s2s.txt',
                                    itokens,
                                    otokens,
                                    h5_file='data/test_subset/en2de.h5')
    Xvalid, Yvalid = dd.MakeS2SData('data/test_subset/en2de.s2s.valid.txt',
                                    itokens,
                                    otokens,
                                    h5_file='data/test_subset/en2de.valid.h5')

if 'origdata' in sys.argv:
    itokens, otokens = dd.MakeS2SDict('data/en2de.s2s.txt',
                                      dict_file='data/en2de_word.txt')
    Xtrain, Ytrain = dd.MakeS2SData('data/en2de.s2s.txt',
                                    itokens,
                                    otokens,
                                    h5_file='data/en2de.h5')
    Xvalid, Yvalid = dd.MakeS2SData('data/en2de.s2s.valid.txt',
                                    itokens,