def create_model(ids,vocab2id,size): word_vector_size = size hidden_state_size = size P = Parameters() P.V = create_vocab_vectors(P,vocab2id,word_vector_size) P.W_predict = np.zeros(P.V.get_value().shape).T P.b_predict = np.zeros((P.V.get_value().shape[0],)) X = P.V[ids] step = build_lstm_step(P,word_vector_size,hidden_state_size) [states,_],_ = theano.scan( step, sequences = [X], outputs_info = [P.init_h,P.init_c] ) scores = T.dot(states,P.W_predict) + P.b_predict scores = T.nnet.softmax(scores) log_likelihood, cross_ent = word_cost(scores[:-1],ids[1:]) cost = log_likelihood #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() ) obv_cost = cross_ent return scores, cost, obv_cost, P
def create_model(ids, vocab2id, size): word_vector_size = size hidden_state_size = size P = Parameters() P.V = create_vocab_vectors(P, vocab2id, word_vector_size) P.W_predict = np.zeros(P.V.get_value().shape).T P.b_predict = np.zeros((P.V.get_value().shape[0], )) X = P.V[ids] step = build_lstm_step(P, word_vector_size, hidden_state_size) [states, _], _ = theano.scan(step, sequences=[X], outputs_info=[P.init_h, P.init_c]) scores = T.dot(states, P.W_predict) + P.b_predict scores = T.nnet.softmax(scores) log_likelihood, cross_ent = word_cost(scores[:-1], ids[1:]) cost = log_likelihood #+ 1e-4 * sum( T.sum(abs(w)) for w in P.values() ) obv_cost = cross_ent return scores, cost, obv_cost, P