def create_model(ids,vocab2id,size):
	word_vector_size  = size
	hidden_state_size = size
	
	P = Parameters()
	P.V = create_vocab_vectors(P,vocab2id,word_vector_size)
	P.W_predict = np.zeros(P.V.get_value().shape).T
	P.b_predict = np.zeros((P.V.get_value().shape[0],))
	X = P.V[ids]

	step = build_lstm_step(P,word_vector_size,hidden_state_size)

	[states,_],_ = theano.scan(
			step,
			sequences    = [X],
			outputs_info = [P.init_h,P.init_c]
		)

	scores = T.dot(states,P.W_predict) + P.b_predict
	scores = T.nnet.softmax(scores)

	log_likelihood, cross_ent = word_cost(scores[:-1],ids[1:])
	cost = log_likelihood #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() )
	obv_cost = cross_ent
	return scores, cost, obv_cost, P
示例#2
0
def create_model(ids, vocab2id, size):
    word_vector_size = size
    hidden_state_size = size

    P = Parameters()
    P.V = create_vocab_vectors(P, vocab2id, word_vector_size)
    P.W_predict = np.zeros(P.V.get_value().shape).T
    P.b_predict = np.zeros((P.V.get_value().shape[0], ))
    X = P.V[ids]

    step = build_lstm_step(P, word_vector_size, hidden_state_size)

    [states, _], _ = theano.scan(step,
                                 sequences=[X],
                                 outputs_info=[P.init_h, P.init_c])

    scores = T.dot(states, P.W_predict) + P.b_predict
    scores = T.nnet.softmax(scores)

    log_likelihood, cross_ent = word_cost(scores[:-1], ids[1:])
    cost = log_likelihood  #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() )
    obv_cost = cross_ent
    return scores, cost, obv_cost, P