def LSTM_sequence_classifer_net(feature, num_output_classes, embedding_dim, LSTM_dim, cell_dim): embedding_function = embedding(feature, embedding_dim) LSTM_function = LSTMP_component_with_self_stabilization( embedding_function.output, LSTM_dim, cell_dim)[0] thought_vector = sequence.last(LSTM_function) return linear_layer(thought_vector, num_output_classes)
def LSTM_sequence_classifer_net(input, num_output_classes, embedding_dim, LSTM_dim, cell_dim): embedding_function = embedding(input, embedding_dim) LSTM_function = LSTMP_component_with_self_stabilization( embedding_function.output, LSTM_dim, cell_dim)[0] thought_vector = sequence.last(LSTM_function) return linear_layer(thought_vector, num_output_classes)
def __init__(self, config=get_config(), **option): scope = config.scope sedim, tedim = option["embdim"] shdim, thdim, ahdim = option["hidden"] maxdim = option["maxhid"] deephid = option["deephid"] k = option["maxpart"] svocab, tvocab = option["vocabulary"] sw2id, sid2w = svocab tw2id, tid2w = tvocab svsize = len(sid2w) tvsize = len(tid2w) with variable_scope(scope): source_embedding = embedding(svsize, sedim, config.source_embedding) target_embedding = embedding(tvsize, tedim, config.target_embedding) rnn_encoder = encoder(sedim, shdim, config.encoder) rnn_decoder = decoder(tedim, shdim, thdim, ahdim, maxdim, k, deephid, tvsize, config.decoder) params = [] params.extend(source_embedding.parameter) params.extend(target_embedding.parameter) params.extend(rnn_encoder.parameter) params.extend(rnn_decoder.parameter) def training_graph(): xseq = theano.tensor.imatrix() xmask = theano.tensor.matrix() yseq = theano.tensor.imatrix() ymask = theano.tensor.matrix() xemb = source_embedding(xseq) yemb = target_embedding(yseq) initstate = theano.tensor.zeros((xemb.shape[1], shdim)) annotation = rnn_encoder(xemb, xmask, initstate) probs = rnn_decoder(yemb, xmask, ymask, annotation) idx = theano.tensor.arange(yseq.flatten().shape[0]) cost = -theano.tensor.log(probs[idx, yseq.flatten()]) cost = cost.reshape(yseq.shape) cost = theano.tensor.sum(cost * ymask, 0) cost = theano.tensor.mean(cost) return [xseq, xmask, yseq, ymask], [cost] def attention_graph(): xseq = theano.tensor.imatrix() xmask = theano.tensor.matrix() yseq = theano.tensor.imatrix() ymask = theano.tensor.matrix() xemb = source_embedding(xseq) yemb = target_embedding(yseq) initstate = theano.tensor.zeros((xemb.shape[1], shdim)) annotation = rnn_encoder(xemb, xmask, initstate) alpha = rnn_decoder.compute_attention_score( yemb, xmask, ymask, annotation) return [xseq, xmask, yseq, ymask], alpha def sampling_graph(): seed = option["seed"] seed_rng = numpy.random.RandomState(numpy.random.randint(seed)) tseed = seed_rng.randint(numpy.iinfo(numpy.int32).max) stream = theano.sandbox.rng_mrg.MRG_RandomStreams(tseed) xseq = theano.tensor.imatrix() xmask = theano.tensor.matrix() maxlen = theano.tensor.iscalar() batch = xseq.shape[1] xemb = source_embedding(xseq) initstate = theano.tensor.zeros([batch, shdim]) annot = rnn_encoder(xemb, xmask, initstate) ymask = theano.tensor.ones([batch]) istate, mannot = rnn_decoder.compute_initstate(annot) def sample_step(pemb, state, xmask, ymask, annot, mannot): alpha, context = rnn_decoder.compute_context( state, xmask, annot, mannot) probs = rnn_decoder.compute_probability(pemb, state, context) next_words = stream.multinomial(pvals=probs).argmax(axis=1) yemb = target_embedding(next_words) next_state = rnn_decoder.compute_state(yemb, ymask, state, context) return [next_words, yemb, next_state] iemb = theano.tensor.zeros([batch, tedim]) seqs = [] outputs_info = [None, iemb, istate] nonseqs = [xmask, ymask, annot, mannot] outputs, u = theano.scan(sample_step, seqs, outputs_info, nonseqs, n_steps=maxlen) return [xseq, xmask, maxlen], outputs[0], u # for beamsearch def encoding_graph(): xseq = theano.tensor.imatrix() xmask = theano.tensor.matrix() xemb = source_embedding(xseq) initstate = theano.tensor.zeros((xseq.shape[1], shdim)) annotation = rnn_encoder(xemb, xmask, initstate) return [xseq, xmask], annotation def initial_state_graph(): annotation = theano.tensor.tensor3() # initstate, mapped_annotation outputs = rnn_decoder.compute_initstate(annotation) return [annotation], outputs def context_graph(): state = theano.tensor.matrix() xmask = theano.tensor.matrix() annotation = theano.tensor.tensor3() mannotation = theano.tensor.tensor3() inputs = [state, xmask, annotation, mannotation] alpha, context = rnn_decoder.compute_context(*inputs) return inputs, [context, alpha] def probability_graph(): y = theano.tensor.ivector() state = theano.tensor.matrix() context = theano.tensor.matrix() # 0 for initial index cond = theano.tensor.neq(y, 0) yemb = target_embedding(y) # zeros out embedding if y is 0 yemb = yemb * cond[:, None] probs = rnn_decoder.compute_probability(yemb, state, context) return [y, state, context], probs def state_graph(): y = theano.tensor.ivector() ymask = theano.tensor.vector() state = theano.tensor.matrix() context = theano.tensor.matrix() yemb = target_embedding(y) inputs = [yemb, ymask, state, context] new_state = rnn_decoder.compute_state(*inputs) return [y, ymask, state, context], new_state def compile_function(graph_fn): outputs = graph_fn() if len(outputs) == 2: inputs, outputs = outputs return theano.function(inputs, outputs) else: inputs, outputs, updates = outputs return theano.function(inputs, outputs, updates=updates) train_inputs, train_outputs = training_graph() search_fn = [] search_fn.append(compile_function(encoding_graph)) search_fn.append(compile_function(initial_state_graph)) search_fn.append(compile_function(context_graph)) search_fn.append(compile_function(probability_graph)) search_fn.append(compile_function(state_graph)) self.name = scope self.config = config self.parameter = params self.option = option self.cost = train_outputs[0] self.inputs = train_inputs self.outputs = train_outputs self.updates = [] self.search = search_fn self.sampler = compile_function(sampling_graph) self.attention = compile_function(attention_graph)
def LSTM_sequence_classifer_net(input, num_output_classes, embedding_dim, LSTM_dim, cell_dim): embedded_inputs = embedding(input, embedding_dim) lstm_outputs = simple_lstm(embedded_inputs, LSTM_dim, cell_dim)[0] thought_vector = sequence.last(lstm_outputs) return linear_layer(thought_vector, num_output_classes)