示例#1
0
def compute_gradient(fflayer):
    """
    args:
        yhat-Size of yhat should be batch,seq,size
        target-Size of target should be batch,seq
    """
    #Works for regular LSTM
    #Seq is a count of times the diff was done.
    #print("fflayer:",fflayer)
    yhat=fflayer.yhat
    target=fflayer.target
    checkdatadim(yhat,3)
    checkdatadim(target,2)
    batch,seq,size=yhat.shape
    target_one_hot=np.zeros((batch,seq,size))
    for batnum in range(batch):
        for i in range(seq):
            target_one_hot[batnum][i]=input_one_hot(target[batnum][i],size)
    dy = yhat.copy()
    dy = dy - target_one_hot
    if (fflayer.layer.backpassdebug):
        print("*****************************************************************")
        print("BackPass:Debug:Name:",fflayer.layer.name)
        print("*****************************************************************")
        print("yhat:",yhat)
        print("label:",target)
        print("labeltransformed:",target_one_hot)
        print("gradient:",dy)
        print("*****************************************************************")
    #a convention to save to this field the grad that has to be passes back to the next layer in reverse
    fflayer.grad=dy
    # return whatever has to be returned to be applied
    return None
示例#2
0
    pred = out_l(h[0][-1].reshape(1, vocab_size))
    return pred


def LOSS(X, target):
    pred = RNN(X, out_weights, out_biases)
    return cross_entropy_loss(pred.reshape([1, 1, vocab_size]),
                              np.array([[target]]))


while step < training_iters:
    if offset > (len(train_data) - end_offset):
        offset = rnd.randint(0, n_input + 1)
    print("offset:", offset)
    symbols_in_keys = [
        input_one_hot(dictionary[str(train_data[i])], vocab_size)
        for i in range(offset, offset + n_input)
    ]
    symbols_in_keys = np.reshape(np.array(symbols_in_keys),
                                 [-1, n_input, vocab_size])
    print("symbols_in_keys:", symbols_in_keys)
    target = dictionary[str(train_data[offset + n_input])]
    """with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
        cell = LSTMCell(n_hidden,debug=True)

    result, state = dynamic_rnn(cell, symbols_in_keys)
    (c, h) = state.c,state.h
    print("final:", repr(result),state,h.shape)

    #last layer of Feed Forward to compare to transform result to the shape of target
    out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases))
        cell= MultiRNNCell([cell1, cell2])
        result, state = dynamic_rnn(cell, symbols_in_keys)
    "Dense in this case should be out of WeightsInitializer scope because we are passing constants"
    out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases))
    return out_l(state[-1].h)


def LOSS(X,target):
    pred=RNN(X,out_weights,out_biases)
    return cross_entropy_loss(pred.reshape([1,1,vocab_size]),np.array([[target]]))

while step < training_iters:
    if offset > (len(train_data) - end_offset):
        offset = rnd.randint(0, n_input + 1)
    print("offset:", offset)
    symbols_in_keys = [input_one_hot(dictionary[str(train_data[i])],vocab_size) for i in range(offset, offset + n_input)]
    symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, vocab_size])
    print("symbols_in_keys:",symbols_in_keys)
    target=dictionary[str(train_data[offset + n_input])]
    """with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
        cell = LSTMCell(n_hidden,debug=True)

    result, state = dynamic_rnn(cell, symbols_in_keys)
    (c, h) = state.c,state.h
    print("final:", repr(result),state,h.shape)

    #last layer of Feed Forward to compare to transform result to the shape of target
    out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases))
    pred=out_l(h)
    print("pred:",pred)"""
示例#4
0
with tf.Session() as session:
    session.run(init)
    step = 0
    #offset = rnd.randint(0,n_input+1)
    offset = 2
    end_offset = n_input + 1
    acc_total = 0
    loss_total = 0
    print("offset:", offset)

    while step < training_iters:
        if offset > (len(train_data) - end_offset):
            offset = rnd.randint(0, n_input + 1)
        print("offset:", offset)
        symbols_in_keys = [
            input_one_hot(dictionary[str(train_data[i])], vocab_size)
            for i in range(offset, offset + n_input)
        ]
        symbols_in_keys = np.reshape(np.array(symbols_in_keys),
                                     [-1, n_input, vocab_size])
        symbols_out_onehot = input_one_hot(
            dictionary[str(train_data[offset + n_input])], vocab_size)
        symbols_out_onehot = np.reshape(symbols_out_onehot, [1, -1])

        tfbi_output,tfbi_state,tfcc_output,tfpreds,tfgrads_and_vars_tf_style, _,acc, loss=session.run(
                [bi_output,bi_state,cc_output,preds,grads_and_vars_tf_style,train_tf_style, accuracy, cost], \
                                                feed_dict={x: symbols_in_keys, y: symbols_out_onehot})
        print("tfbi_output:", tfbi_output)
        print("tfbi_state:", tfbi_state)
        print("cc_output:", tfcc_output)
        print("tfpreds:", tfpreds)
示例#5
0
sm = softmax(x)
print("softmax:", sm)
jacobian = _softmax_grad(sm[0])
print("jacobian:", jacobian)
jacobian = _softmax_grad(sm[1])
print(jacobian)
"""
Example-2 Softmax and loss
"""
x = np.array([[1, 3, 5, 7], [1, -9, 4, 8]])
y = np.array([3, 1])

sm = softmax(x)

#prints out 0.145
print(loss(sm[0], input_one_hot(y[0], 4)))
#prints out 17.01
print(loss(sm[1], input_one_hot(y[1], 4)))
"""
Example-3 Softmax and crossentropyloss
"""
x = np.array([[[1, 3, 5, 7], [1, -9, 4, 8]]])
y = np.array([[3, 1]])

#prints array([[ 0.14507794, 17.01904505]]))
softmaxed, loss = cross_entropy_loss(x, y)
print("loss:", loss)
"""
Example-4 Combined Gradient of Loss with respect to x
"""
batch, seq, size = x.shape