def test_word_one_hot_vector_convertor(): from data.simple_chain_engine import SimpleChainEngine engine = SimpleChainEngine('0123456789abcdef') s, c = engine.get_data() print "%s -> %s" %(s,c) ss, cs = engine.get_dataset(5) for (s, c) in zip(ss, cs): print "%s -> %s" %(s,c) print engine.get_dictionary() convertor = word_one_hot_vector_convertor(engine.get_dictionary()) for word in engine.get_dictionary(): print "%s -> %s" %(word, convertor.word2one_hot_vector(word).astype('int8')) for word in engine.get_dictionary(): print "%s -> %s" %(word, convertor.one_hot_vector2word(convertor.word2one_hot_vector(word).astype('int8'))) matrixs = [] for c in cs: matrixs.append(convertor.sentence2one_hot_matrix(c)) for c, matrix in zip(cs, matrixs): print "%s -> " %(c) print matrix.astype('int8') for c, matrix in zip(cs, matrixs): print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix)) maxlen = len(engine.get_dictionary()) + 10 matrixs = [] masks = [] for c in cs: matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen) matrixs.append(matrix) masks.append(mask) for c, matrix, mask in zip(cs, matrixs, masks): print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix, mask)) tensor, mask = convertor.sentences2one_hot_tensor(cs, len(engine.get_dictionary())) #print 'tensor:' #print tensor #print 'mask:' #print mask recs = convertor.one_hot_tensor2sentences(tensor, mask) for c ,rec in zip(cs, recs): print "%s -> %s" %(c, rec)
s = '' for word in sentence: s += word + ' ' return s words = [str(i) for i in range(10)] + [chr(i) for i in range(65, 75)] print('words:', words) #Dataset DATA_SIZE = 1000 HIDDEN_SIZE = 128 BATCH_SIZE = 33 MAXLEN = len(words) engine = SimpleChainEngine(words) starts, sentences = engine.get_dataset(DATA_SIZE) for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences): print("%s -> %s" % (sentence2str(start), sentence2str(sentence))) if i >= 5: break sinputs = [sentence[:-1] for sentence in sentences] soutputs = [sentence[1:] for sentence in sentences] for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs): print("%s -> %s" % (sentence2str(sinput), sentence2str(soutput))) if i >= 5: break convertor = word_one_hot_vector_convertor(engine.get_dictionary()) D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN)
from data.simple_chain_engine import SimpleChainEngine from data.character_data_engine import CharacterDataEngine from keras import backend as K from keras.layers.core import Activation, TimeDistributedDense from keras_layer.shift import Shift from keras.layers.recurrent import SimpleRNN import numpy as np from keras.layers.containers import Graph TRAINING_SIZE = 100 chars = "0123456789abcdef" print("Generating data...") engine = SimpleChainEngine(chars) starts, chains = engine.get_dataset(TRAINING_SIZE) print("Total number of data:", len(starts)) print("Vectorization...") convertor = CharacterDataEngine(chars, maxlen=len(chars) - 1) initial_value = convertor.encode_dataset(starts, maxlen=1) y = convertor.encode_dataset(chains) split_at = len(y) - len(y) / 10 (y_train, y_val) = (y[:split_at], y[split_at:]) (i_train, i_val) = (initial_value[:split_at], initial_value[split_at:]) (X_train, X_val) = (y_train, y_val) print(i_train.shape) print(y_train.shape) print("Build model...") HIDDEN_SIZE = 128 BATCH_SIZE = 50
def test_word_one_hot_vector_convertor(): from data.simple_chain_engine import SimpleChainEngine engine = SimpleChainEngine('0123456789abcdef') s, c = engine.get_data() print "%s -> %s" % (s, c) ss, cs = engine.get_dataset(5) for (s, c) in zip(ss, cs): print "%s -> %s" % (s, c) print engine.get_dictionary() convertor = word_one_hot_vector_convertor(engine.get_dictionary()) for word in engine.get_dictionary(): print "%s -> %s" % (word, convertor.word2one_hot_vector(word).astype('int8')) for word in engine.get_dictionary(): print "%s -> %s" % ( word, convertor.one_hot_vector2word( convertor.word2one_hot_vector(word).astype('int8'))) matrixs = [] for c in cs: matrixs.append(convertor.sentence2one_hot_matrix(c)) for c, matrix in zip(cs, matrixs): print "%s -> " % (c) print matrix.astype('int8') for c, matrix in zip(cs, matrixs): print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix)) maxlen = len(engine.get_dictionary()) + 10 matrixs = [] masks = [] for c in cs: matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen) matrixs.append(matrix) masks.append(mask) for c, matrix, mask in zip(cs, matrixs, masks): print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix, mask)) tensor, mask = convertor.sentences2one_hot_tensor( cs, len(engine.get_dictionary())) #print 'tensor:' #print tensor #print 'mask:' #print mask recs = convertor.one_hot_tensor2sentences(tensor, mask) for c, rec in zip(cs, recs): print "%s -> %s" % (c, rec)
s = '' for word in sentence: s += word + ' ' return s words = [str(i) for i in range(10)] + [chr(i) for i in range(65,75)] print ('words:', words) #Dataset DATA_SIZE = 1000 HIDDEN_SIZE = 128 BATCH_SIZE = 33 MAXLEN = len(words) engine = SimpleChainEngine(words) starts, sentences = engine.get_dataset(DATA_SIZE) for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences): print ("%s -> %s" %(sentence2str(start), sentence2str(sentence))) if i>=5: break sinputs = [sentence[:-1] for sentence in sentences] soutputs = [sentence[1:] for sentence in sentences] for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs): print ("%s -> %s" %(sentence2str(sinput), sentence2str(soutput))) if i>=5: break convertor = word_one_hot_vector_convertor(engine.get_dictionary()) D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN)