示例#1
0
文件: convertor.py 项目: lchmo444/dlx
def test_word_one_hot_vector_convertor():
    from data.simple_chain_engine import SimpleChainEngine
    engine = SimpleChainEngine('0123456789abcdef')
    s, c = engine.get_data()
    print "%s -> %s" % (s, c)
    ss, cs = engine.get_dataset(5)
    for (s, c) in zip(ss, cs):
        print "%s -> %s" % (s, c)
    print engine.get_dictionary()

    convertor = word_one_hot_vector_convertor(engine.get_dictionary())
    for word in engine.get_dictionary():
        print "%s -> %s" % (word,
                            convertor.word2one_hot_vector(word).astype('int8'))

    for word in engine.get_dictionary():
        print "%s -> %s" % (
            word,
            convertor.one_hot_vector2word(
                convertor.word2one_hot_vector(word).astype('int8')))

    matrixs = []
    for c in cs:
        matrixs.append(convertor.sentence2one_hot_matrix(c))

    for c, matrix in zip(cs, matrixs):
        print "%s -> " % (c)
        print matrix.astype('int8')

    for c, matrix in zip(cs, matrixs):
        print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix))

    maxlen = len(engine.get_dictionary()) + 10
    matrixs = []
    masks = []
    for c in cs:
        matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen)
        matrixs.append(matrix)
        masks.append(mask)

    for c, matrix, mask in zip(cs, matrixs, masks):
        print "%s -> %s" % (c, convertor.one_hot_matrix2sentence(matrix, mask))

    tensor, mask = convertor.sentences2one_hot_tensor(
        cs, len(engine.get_dictionary()))
    #print 'tensor:'
    #print tensor
    #print 'mask:'
    #print mask
    recs = convertor.one_hot_tensor2sentences(tensor, mask)
    for c, rec in zip(cs, recs):
        print "%s -> %s" % (c, rec)
示例#2
0
文件: convertor.py 项目: lxastro/dlx
def test_word_one_hot_vector_convertor():       
    from data.simple_chain_engine import SimpleChainEngine
    engine = SimpleChainEngine('0123456789abcdef')
    s, c = engine.get_data()
    print "%s -> %s" %(s,c)
    ss, cs = engine.get_dataset(5)
    for (s, c) in zip(ss, cs):
        print "%s -> %s" %(s,c)
    print engine.get_dictionary() 
    
    convertor = word_one_hot_vector_convertor(engine.get_dictionary())
    for word in engine.get_dictionary():
        print "%s -> %s" %(word, convertor.word2one_hot_vector(word).astype('int8'))
        
    for word in engine.get_dictionary():
        print "%s -> %s" %(word, convertor.one_hot_vector2word(convertor.word2one_hot_vector(word).astype('int8')))
    
    matrixs = []
    for c in cs:
        matrixs.append(convertor.sentence2one_hot_matrix(c))
    
    for c, matrix in zip(cs, matrixs):
        print "%s -> " %(c)
        print matrix.astype('int8')

    for c, matrix in zip(cs, matrixs):
        print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix))
        
    maxlen = len(engine.get_dictionary()) + 10
    matrixs = []
    masks = []
    for c in cs:
        matrix, mask = convertor.sentence2one_hot_matrix(c, maxlen)
        matrixs.append(matrix)
        masks.append(mask)

    for c, matrix, mask in zip(cs, matrixs, masks):
        print "%s -> %s" %(c, convertor.one_hot_matrix2sentence(matrix, mask))
        
    tensor, mask = convertor.sentences2one_hot_tensor(cs, len(engine.get_dictionary()))
    #print 'tensor:'
    #print tensor
    #print 'mask:'
    #print mask
    recs = convertor.one_hot_tensor2sentences(tensor, mask)
    for c ,rec in zip(cs, recs):
        print "%s -> %s" %(c, rec)
示例#3
0
文件: chain_rnn.py 项目: lchmo444/dlx
engine = SimpleChainEngine(words)
starts, sentences = engine.get_dataset(DATA_SIZE)
for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences):
    print("%s -> %s" % (sentence2str(start), sentence2str(sentence)))
    if i >= 5:
        break

sinputs = [sentence[:-1] for sentence in sentences]
soutputs = [sentence[1:] for sentence in sentences]
for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs):
    print("%s -> %s" % (sentence2str(sinput), sentence2str(soutput)))
    if i >= 5:
        break

convertor = word_one_hot_vector_convertor(engine.get_dictionary())
D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN)
D_Y, _ = convertor.sentences2one_hot_tensor(soutputs, MAXLEN)
print(D_X.shape, D_Y.shape, D_mask.shape)

# Shuffle (X, Y)
indices = np.arange(DATA_SIZE)
np.random.shuffle(indices)
D_X = D_X[indices]
D_Y = D_Y[indices]
D_mask = D_mask[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = DATA_SIZE - DATA_SIZE / 10
(D_X_train, D_X_val) = (D_X[:split_at], D_X[split_at:])
(D_Y_train, D_Y_val) = (D_Y[:split_at], D_Y[split_at:])
示例#4
0
文件: chain_rnn.py 项目: lxastro/dlx
engine = SimpleChainEngine(words)
starts, sentences = engine.get_dataset(DATA_SIZE)
for (i, start, sentence) in zip(range(DATA_SIZE), starts, sentences):
    print ("%s -> %s" %(sentence2str(start), sentence2str(sentence)))
    if i>=5:
        break

sinputs = [sentence[:-1] for sentence in sentences]
soutputs = [sentence[1:] for sentence in sentences]
for (i, sinput, soutput) in zip(range(DATA_SIZE), sinputs, soutputs):
    print ("%s -> %s" %(sentence2str(sinput), sentence2str(soutput)))
    if i>=5:
        break

convertor = word_one_hot_vector_convertor(engine.get_dictionary())
D_X, D_mask = convertor.sentences2one_hot_tensor(sinputs, MAXLEN)
D_Y, _ = convertor.sentences2one_hot_tensor(soutputs, MAXLEN)
print (D_X.shape, D_Y.shape, D_mask.shape)

# Shuffle (X, Y)
indices = np.arange(DATA_SIZE)
np.random.shuffle(indices)
D_X = D_X[indices]
D_Y = D_Y[indices]
D_mask = D_mask[indices]

# Explicitly set apart 10% for validation data that we never train over
split_at = DATA_SIZE - DATA_SIZE / 10
(D_X_train, D_X_val) = (D_X[:split_at], D_X[split_at:])
(D_Y_train, D_Y_val) = (D_Y[:split_at], D_Y[split_at:])