def __init__(self, n_input, n_hidden, n_output, optimizer=sgd, p=0.5): self.x = T.itensor3('batched_sequence_x') # (n_maxlen, n_batch, 2) self.x_mask_r = T.matrix('x_mask_r') self.x_mask_c = T.matrix('x_mask_c') self.y = T.itensor3('batched_sequence_y') # (n_maxlen, n_batch, 2) self.y_mask = T.matrix('y_mask') self.n_input = n_input self.n_hidden = n_hidden self.n_output = n_output self.floatX = theano.config.floatX self.E_table_size = int(np.ceil(np.sqrt(n_output))) self.p = p init_Er = np.asarray(np.random.uniform(low=-np.sqrt(1./self.n_output), high=np.sqrt(1./self.n_output), size=(self.E_table_size, self.n_input)), dtype=self.floatX) self.Er = theano.shared(value=init_Er, name='row_word_embedding', borrow=True) init_Ec = np.asarray(np.random.uniform(low=-np.sqrt(1./self.n_output), high=np.sqrt(1./self.n_output), size=(self.E_table_size, self.n_input)), dtype=self.floatX) self.Ec = theano.shared(value=init_Ec, name='column_word_embedding', borrow=True) self.optimizer = optimizer self.is_train = T.iscalar('is_train') self.n_batch = T.iscalar('n_batch') self.epsilon = 1.0e-15 self.rng = RandomStreams(1234) self.build()
def set_model(self): say('\n\nBUILD A MODEL\n') argv = self.argv ##################### # Network variables # ##################### c = T.itensor3('c') r = T.itensor3('r') a = T.ftensor3('a') y_r = T.ivector('y_r') y_a = T.imatrix('y_a') n_agents = T.iscalar('n_agents') max_n_agents = self.max_n_agents init_emb = self.init_emb n_vocab = self.vocab.size() ################# # Build a model # ################# say('MODEL: %s Unit: %s Opt: %s Activation: %s ' % (argv.model, argv.unit, argv.opt, argv.activation)) if argv.model == 'static': model = StaticModel else: model = DynamicModel self.model = model(argv, max_n_agents, n_vocab, init_emb) self.model.compile(c=c, r=r, a=a, y_r=y_r, y_a=y_a, n_agents=n_agents)
def __init__(self, input_dim, proj_dim=128, neg_samples=4, init='uniform', activation='tanh', weights=None, W_regularizer=None, activity_regularizer=None, **kwargs): super(WordTagContextProduct_tensor, self).__init__(**kwargs) self.input_dim = input_dim self.proj_dim = proj_dim self.samples = neg_samples + 1 #np.random.seed(0) self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) #self.input = T.imatrix() #self.input = T.itensor3() self.input = [T.itensor3(), T.itensor3()] # two different embeddings for pivot word and its context # because p(w|c) != p(c|w) self.W_w = self.init((input_dim, proj_dim)) self.W_c = self.init((input_dim, proj_dim)) self.params = [self.W_w, self.W_c] if weights is not None: self.set_weights(weights)
def _get_input_tensor_variables(self): # x_w: 1D: batch, 2D: n_prds, 3D: n_words, 4D: 5 + window; elem=word id # x_p: 1D: batch, 2D: n_prds, 3D: n_words; elem=posit id # y: 1D: batch, 2D: n_prds, 3D: n_words; elem=label id if self.argv.mark_phi: return [T.itensor4('x_w'), T.itensor3('x_p'), T.itensor3('y')] return [T.itensor4('x_w'), T.itensor3('y')]
def __init__(self, input_dim, proj_dim=128, neg_samples = 4, init='uniform', activation='tanh', weights=None,W_regularizer = None, activity_regularizer=None, **kwargs): super(WordTagContextProduct_tensor, self).__init__(**kwargs) self.input_dim = input_dim self.proj_dim = proj_dim self.samples = neg_samples + 1 #np.random.seed(0) self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) #self.input = T.imatrix() #self.input = T.itensor3() self.input = [T.itensor3(), T.itensor3()] # two different embeddings for pivot word and its context # because p(w|c) != p(c|w) self.W_w = self.init((input_dim, proj_dim)) self.W_c = self.init((input_dim, proj_dim)) self.params = [self.W_w, self.W_c] if weights is not None: self.set_weights(weights)
def _setup_training_graph(self): """ Connect graphs together for training and store in/out ports & updates (propagation) inputs : input, target, step_size outputs : loss updates : prev_states[, grads] (param update) inputs : lr outputs : None updates : params (optim init) inputs : None outputs : None updates : optimizer states """ p_input_tbi = tt.itensor3(name='i_port_input') p_target_tbi = tt.itensor3(name='i_port_target') p_step_size = tt.iscalar(name='i_port_step_size') p_lr = tt.fscalar(name='i_port_lr') self._prev_state_updates = [] losses = [] # list of s_loss gradss = [] # list of s_grads (i.e., list of list) for s in self._slices: s_step_size = s.transfer(p_step_size) s_output_tbi, prev_state_updates = self._setup_forward_graph \ (s_input_tbi = s.apply(p_input_tbi), s_time_tb = None, s_next_prev_idx = s_step_size - 1, v_params = s.v_params, v_prev_states = s.v_prev_states) self._prev_state_updates += prev_state_updates s_loss = self._setup_loss_graph \ (s_output_tbi = s_output_tbi, s_target_tbi = s.apply(p_target_tbi), s_step_size = s_step_size) losses += [self.transfer(s_loss)] s_grads = self._setup_grads_graph \ (s_loss = s_loss, v_wrt = list(itervalues(s.v_params))) gradss += [[self.transfer(s_grad) for s_grad in s_grads]] # sum losses and grads from all slices p_loss = sum(losses) s_new_grads = [sum(grad_tuple) for grad_tuple in zip(*gradss)] self._grad_updates = [u for u in zip(self._v_grads, s_new_grads)] self._optim_inits, self._optim_param_updates, s_increments = \ self._setup_optimizer_graph(s_lr = self.transfer(p_lr), v_grads = self._v_grads) for s in self._slices: self._optim_param_updates += \ [(p, p + i) for p, i in zip(s.v_params.values(), s_increments)] self._prop_i_ports = [p_input_tbi, p_target_tbi, p_step_size] self._prop_o_ports = [p_loss] self._update_i_ports = [p_lr]
def __init__(self, K, vocab_size, num_chars, W_init, nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, save_attn=False): self.nhidden = nhidden self.embed_dim = embed_dim self.dropout = dropout self.train_emb = train_emb self.char_dim = char_dim self.learning_rate = LEARNING_RATE self.num_chars = num_chars self.use_feat = use_feat self.save_attn = save_attn self.gating_fn = gating_fn self.use_chars = self.char_dim!=0 if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim)) doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \ T.wtensor3('cand') docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \ T.bmatrix('c_mask') target_var = T.ivector('ans') feat_var = T.imatrix('feat') doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars') tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask') cloze_var = T.ivector('cloze') self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var, qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var] self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = ( self.build_network(K, vocab_size, W_init)) self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean() self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val, target_var).mean() self.params = L.get_all_params(self.network, trainable=True) updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate) self.train_fn = theano.function(self.inps, [self.loss_fn, self.eval_fn, self.predicted_probs], updates=updates, on_unused_input='warn') self.validate_fn = theano.function(self.inps, [loss_fn_val, eval_fn_val, predicted_probs_val]+attentions, on_unused_input='warn')
def __init__(self, K, vocab_size, W_init, regularizer, rlambda, nhidden, embed_dim, dropout, train_emb, subsample): self.nhidden = nhidden self.embed_dim = embed_dim self.dropout = dropout self.train_emb = train_emb self.subsample = subsample norm = lasagne.regularization.l2 if regularizer == 'l2' else lasagne.regularization.l1 if W_init is None: W_init = lasagne.init.GlorotNormal().sample( (vocab_size, self.embed_dim)) doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3( 'quer'), T.wtensor3('cand') docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \ T.bmatrix('c_mask') target_var = T.ivector('ans') if rlambda > 0.: W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape) else: W_pert = W_init predicted_probs, predicted_probs_val, self.doc_net, self.q_net, W_emb = self.build_network( K, vocab_size, doc_var, query_var, cand_var, docmask_var, qmask_var, candmask_var, W_pert) loss_fn = T.nnet.categorical_crossentropy(predicted_probs, target_var).mean() + \ rlambda*norm(W_emb-W_init) eval_fn = lasagne.objectives.categorical_accuracy( predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \ rlambda*norm(W_emb-W_init) eval_fn_val = lasagne.objectives.categorical_accuracy( predicted_probs_val, target_var).mean() params = L.get_all_params(self.doc_net, trainable=True) + \ L.get_all_params(self.q_net, trainable=True) updates = lasagne.updates.adam(loss_fn, params, learning_rate=LEARNING_RATE) self.train_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \ qmask_var, candmask_var], [loss_fn, eval_fn, predicted_probs], updates=updates) self.validate_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \ qmask_var, candmask_var], [loss_fn_val, eval_fn_val, predicted_probs_val])
def __init__(self, num_chars, char_dim, max_word_len, embed_dim): self.num_chars = num_chars self.char_dim = char_dim self.max_word_len = max_word_len self.embed_dim = embed_dim chars1, chars2 = T.itensor3(), T.itensor3() mask1, mask2 = T.btensor3(), T.btensor3() self.inps = [chars1, chars2, mask1, mask2] l_e1, l_e2 = self.build_network() self.fn = theano.function( self.inps, [L.get_output(l_e1), L.get_output(l_e2)])
def build(word_embeddings, len_voc, word_emb_dim, args, freeze=False): # input theano vars posts = T.imatrix() post_masks = T.fmatrix() ques_list = T.itensor3() ques_masks_list = T.ftensor3() ans_list = T.itensor3() ans_masks_list = T.ftensor3() labels = T.imatrix() N = args.no_of_candidates post_out, post_lstm_params = build_lstm(posts, post_masks, args.post_max_len, \ word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size) ques_out, ques_emb_out, ques_lstm_params = build_list_lstm(ques_list, ques_masks_list, N, args.ques_max_len, \ word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size) ans_out, ans_emb_out, ans_lstm_params = build_list_lstm(ans_list, ans_masks_list, N, args.ans_max_len, \ word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size) ques_sim, pq_a_squared_errors, pq_a_loss, post_ques_dense_params \ = answer_model(post_out, ques_out, ques_emb_out, ans_out, ans_emb_out, labels, args) all_params = post_lstm_params + ques_lstm_params + post_ques_dense_params post_out, post_lstm_params = build_lstm(posts, post_masks, args.post_max_len, \ word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size) ques_out, ques_emb_out, ques_lstm_params = build_list_lstm(ques_list, ques_masks_list, N, args.ques_max_len, \ word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size) ans_out, ans_emb_out, ans_lstm_params = build_list_lstm(ans_list, ans_masks_list, N, args.ans_max_len, \ word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size) pqa_loss, post_ques_ans_dense_params, pqa_preds = utility_calculator(post_out, ques_out, ques_emb_out, ans_out, \ ques_sim, pq_a_squared_errors, labels, args) all_params += post_lstm_params + ques_lstm_params + ans_lstm_params all_params += post_ques_ans_dense_params loss = pq_a_loss + pqa_loss loss += args.rho * sum(T.sum(l**2) for l in all_params) updates = lasagne.updates.adam(loss, all_params, learning_rate=args.learning_rate) train_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \ [loss, pq_a_loss, pqa_loss] + pq_a_squared_errors + ques_sim + pqa_preds, updates=updates) test_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \ [loss, pq_a_loss, pqa_loss] + pq_a_squared_errors + ques_sim + pqa_preds,) return train_fn, test_fn
def __theano_train__(self, n_size): """ Pr(l|u, C(l)) = Pr(l|u) * Pr(l|C(l)) Pr(u, l, t) = Pr(l|u, C(l)) if C(l) exists, Pr(l|u) otherwise. $Theta$ = argmax Pr(u, l, t) """ tra_mask = T.ivector() seq_length = T.sum(tra_mask) # 有效长度 wl = T.concatenate((self.wl, self.wl_m)) tidx, cidx, bidx, userid = T.ivector(), T.imatrix(), T.itensor3( ), T.iscalar() pb = self.pb[bidx] # (seq_length x 4 x depth x n_size) lrs = self.lrs[tidx] # (seq_length x 4 x depth) # user preference xu = self.xu[userid] plu = softmax(T.dot(xu, self.wl.T)) # geographical influence cl = T.sum(wl[cidx], axis=1) # (seq_length x n_size) cl = cl.reshape((cl.shape[0], 1, 1, cl.shape[1])) br = sigmoid(T.sum(pb[:seq_length] * cl, axis=3) * lrs[:seq_length]) * T.ceil(abs(T.mean(cl, axis=3))) path = T.prod(br, axis=2) * self.probs[tidx][:seq_length] # paths = T.prod((T.floor(1-path) + path), axis=1) paths = T.sum(path, axis=1) paths = T.floor(1 - paths) + paths # ---------------------------------------------------------------------------- # cost, gradients, learning rate, l2 regularization lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1] seq_l2_sq = T.sum([T.sum(par**2) for par in [xu, self.wl]]) upq = -1 * T.sum(T.log(plu[tidx[:seq_length]] * paths)) / seq_length seq_costs = (upq + 0.5 * l2 * seq_l2_sq) seq_grads = T.grad(seq_costs, self.params) seq_updates = [(par, par - lr * gra) for par, gra in zip(self.params, seq_grads)] pars_subs = [(self.xu, xu), (self.pb, pb)] seq_updates.extend([ (par, T.set_subtensor(sub, sub - lr * T.grad(seq_costs, sub))) for par, sub in pars_subs ]) # ---------------------------------------------------------------------------- uidx = T.iscalar() # T.iscalar()类型是 TensorType(int32, ) self.seq_train = theano.function( inputs=[uidx], outputs=upq, updates=seq_updates, givens={ userid: uidx, tidx: self.tra_target_masks[uidx], cidx: self.tra_context_masks[T.arange(self.tra_accum_lens[uidx][0], self.tra_accum_lens[uidx][1])], bidx: self.routes[self.tra_target_masks[uidx]], tra_mask: self.tra_masks[uidx] # tra_mask_cot: self.tra_masks_cot[T.arange(self.tra_accum_lens[uidx][0], self.tra_accum_lens[uidx][1])] })
def testSplitOutputByFilter(self): self.setSeeds() input_shape = (self.batch_size, self.max_seq_len, self.n_filters * self.filter_width) output_shape = (self.batch_size, self.n_filters, self.max_seq_len, self.filter_width) x = np.arange(np.prod(input_shape)) x = x.reshape(input_shape).astype(np.int32) y = np.zeros_like(x) y = np.reshape(y, output_shape) for i in range(self.n_filters): s = x[:, :, i * self.filter_width:(i + 1) * self.filter_width] y[:, i, :, :] = s xt = T.itensor3('xt') layer = SplitOutputByFilter(self.n_filters, self.filter_width) yt = layer._get_output(xt) f = theano.function(inputs=[xt], outputs=yt) y_theano = f(x) self.assertEquals(y.shape, y_theano.shape) self.assertTrue(np.all(y == y_theano))
def build_evpi_model(word_embeddings, len_voc, word_emb_dim, N, args, freeze=False): # input theano vars posts = T.imatrix() post_masks = T.fmatrix() ans_list = T.itensor3() ans_masks_list = T.ftensor3() labels = T.imatrix() utility_posts = T.imatrix() utility_post_masks = T.fmatrix() utility_labels = T.ivector() utility_preds, utility_post_ans_preds, utility_params = build_utility_lstm(utility_posts, utility_post_masks, \ posts, post_masks, ans_list, ans_masks_list, \ N, args.post_max_len, args.ans_max_len, \ word_embeddings, word_emb_dim, args.hidden_dim, len_voc) utility_loss = T.sum(lasagne.objectives.binary_crossentropy(utility_preds, utility_labels)) utility_loss += T.sum(lasagne.objectives.binary_crossentropy(utility_preds, utility_labels)*2*utility_labels) loss = 0.0 for i in range(N): loss += T.sum(lasagne.objectives.binary_crossentropy(utility_post_ans_preds[i], labels[:,i])) utility_loss += args.rho * sum(T.sum(l ** 2) for l in utility_params) # utility_updates = lasagne.updates.adam(utility_loss+loss, utility_params, learning_rate=args.learning_rate) utility_updates = lasagne.updates.adam(utility_loss, utility_params, learning_rate=args.learning_rate) utility_train_fn = theano.function([utility_posts, utility_post_masks, utility_labels, posts, post_masks, ans_list, ans_masks_list, labels], \ [utility_preds, utility_loss, loss] + utility_post_ans_preds, updates=utility_updates) utility_dev_fn = theano.function([utility_posts, utility_post_masks, utility_labels, posts, post_masks, ans_list, ans_masks_list, labels], \ [utility_preds, utility_loss, loss] + utility_post_ans_preds,) return utility_train_fn, utility_dev_fn
def BuildModel(modelSpecs, forTrain=True): rng = np.random.RandomState() ## x is for sequential features x = T.tensor3('x') ## mask for x xmask = T.bmatrix('xmask') propertyPredictor = ResNet4Properties( rng, seqInput=x, mask_seq=xmask, modelSpecs=modelSpecs ) ## labelList is a list of label matrices, each with shape (batchSize, seqLen, numLabels) labelList = [] if forTrain: ## when this model is used for training. We need to define the label variable labelList = [] for res in modelSpecs['responses']: labelType = Response2LabelType(res) if labelType.startswith('Discrete'): labelList.append( T.itensor3('label4' + res ) ) else: labelList.append( T.tensor3('label4' + res ) ) ## weightList is a list of label weight matices, each with shape (batchSize, seqLen, 1) ## we always use weight to deal with residues without 3D coordinates weightList = [] if len(labelList)>0: weightList = [ T.tensor3('weight4' + res ) for res in modelSpecs['responses'] ] if len(labelList)>0: return propertyPredictor, x, xmask, labelList, weightList else: return propertyPredictor, x, xmask
def testSplitOutputByFilter(self): self.setSeeds() input_shape = (self.batch_size, self.max_seq_len, self.n_filters * self.filter_width) output_shape = (self.batch_size, self.n_filters, self.max_seq_len, self.filter_width) x = np.arange(np.prod(input_shape)) x = x.reshape(input_shape).astype(np.int32) y = np.zeros_like(x) y = np.reshape(y, output_shape) for i in range(self.n_filters): s = x[:, :, i*self.filter_width:(i+1)*self.filter_width] y[:, i, :, :] = s xt = T.itensor3('xt') layer = SplitOutputByFilter(self.n_filters, self.filter_width) yt = layer._get_output(xt) f = theano.function(inputs=[xt], outputs=yt) y_theano = f(x) self.assertEquals(y.shape, y_theano.shape) self.assertTrue(np.all(y == y_theano))
def main(): xs = itensor3('xs') ins = ((None, None, 93), xs) gru = GRU( inputs=ins, hiddens=128, direction='bidirectional' ) print("GRU output (hiddens) shape: ", gru.output_size) print("GRU params: ", gru.get_params()) lstm = LSTM( inputs=ins, hiddens=128, direction='bidirectional' ) print("LSTM output (hiddens) shape: ", lstm.output_size) print("LSTM params: ", lstm.get_params()) rnn = RNN( inputs=ins, hiddens=128, direction='bidirectional' ) print("RNN output (hiddens) shape: ", rnn.output_size) print("RNN params: ", rnn.get_params())
def make_node(self, x, x2, x3, x4, x5): # check that the theano version has support for __props__. # This next line looks like it has a typo, # but it's actually a way to detect the theano version # is sufficiently recent to support the use of __props__. assert hasattr(self, '_props'), "Your version of theano is too old to support __props__." x = tensor.as_tensor_variable(x) x2 = tensor.as_tensor_variable(x2) x3 = tensor.as_tensor_variable(x3) x4 = tensor.as_tensor_variable(x4) x5 = tensor.as_tensor_variable(x5) if prm.att_doc: if prm.compute_emb: td = tensor.itensor4().type() else: td = tensor.ftensor4().type() tm = tensor.ftensor3().type() else: if prm.compute_emb: td = tensor.itensor3().type() else: td = tensor.ftensor3().type() tm = tensor.fmatrix().type() return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \ tensor.fmatrix().type(), tensor.ivector().type()])
def __init__(self, batch_size, emb_X, num_words, lstm_params, conv_param, output_size, f1_classes): super().__init__(batch_size) self.num_words = num_words self.inputs = [T.itensor3('input'), T.tensor3('mask')] self.target = T.ivector('target') l = InputLayer((batch_size, num_words, None), self.inputs[0]) l_mask = InputLayer((batch_size, num_words, None), self.inputs[1]) l = ReshapeLayer(l, (-1, [2])) l_mask = ReshapeLayer(l_mask, (-1, [2])) l = EmbeddingLayer(l, emb_X.shape[0], emb_X.shape[1], W=emb_X) for lstm_param in lstm_params: l = LSTMLayer( l, lstm_param, grad_clipping=100, nonlinearity=tanh, mask_input=l_mask, only_return_final=True ) l = ReshapeLayer(l, (batch_size, num_words, -1)) l_convs = [] for filter_size in conv_param[1]: l_cur = Conv1DLayer(l, conv_param[0], filter_size, pad='full', nonlinearity=rectify) l_cur = MaxPool1DLayer(l_cur, num_words + filter_size - 1, ignore_border=True) l_cur = FlattenLayer(l_cur) l_convs.append(l_cur) l = ConcatLayer(l_convs) l = DropoutLayer(l) l = DenseLayer(l, output_size, nonlinearity=log_softmax) self.constraints[l.W] = lambda u, v: norm_constraint(v, 3) self.pred = T.exp(get_output(l, deterministic=True)) self.loss = T.mean(categorical_crossentropy_exp(self.target, get_output(l))) params = get_all_params(l, trainable=True) self.updates = adadelta(self.loss, params) self.metrics = {'train': [acc], 'val': [acc, f1(f1_classes)]} self.network = l self.compile()
def add_datasets_to_graph(list_of_datasets, list_of_names, graph, strict=True, list_of_test_values=None): assert len(list_of_datasets) == len(list_of_names) datasets_added = [] for n, (dataset, name) in enumerate(zip(list_of_datasets, list_of_names)): if dataset.dtype != "int32": if len(dataset.shape) == 1: sym = tensor.vector() elif len(dataset.shape) == 2: sym = tensor.matrix() elif len(dataset.shape) == 3: sym = tensor.tensor3() else: raise ValueError("dataset %s has unsupported shape" % name) elif dataset.dtype == "int32": if len(dataset.shape) == 1: sym = tensor.ivector() elif len(dataset.shape) == 2: sym = tensor.imatrix() elif len(dataset.shape) == 3: sym = tensor.itensor3() else: raise ValueError("dataset %s has unsupported shape" % name) else: raise ValueError("dataset %s has unsupported dtype %s" % (name, dataset.dtype)) if list_of_test_values is not None: sym.tag.test_value = list_of_test_values[n] tag_expression(sym, name, dataset.shape) datasets_added.append(sym) graph["__datasets_added__"] = datasets_added return datasets_added
def __init__(self, vocab_size, W_init=lasagne.init.GlorotNormal()): input_var, mask_var, target_var = T.itensor3('dq_pair'), T.imatrix( 'dq_mask'), T.ivector('ans') self.network = self.build_network(vocab_size, input_var, mask_var, W_init) predicted_probs = L.get_output(self.network) predicted_probs_val = L.get_output(self.network, deterministic=True) loss_fn = T.nnet.categorical_crossentropy(predicted_probs, target_var).mean() eval_fn = lasagne.objectives.categorical_accuracy( predicted_probs, target_var).mean() loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() eval_fn_val = lasagne.objectives.categorical_accuracy( predicted_probs_val, target_var).mean() params = L.get_all_params(self.network, trainable=True) updates = lasagne.updates.rmsprop(loss_fn, params, rho=0.95, learning_rate=LEARNING_RATE) updates_with_momentum = lasagne.updates.apply_momentum(updates, params=params) self.train_fn = theano.function([input_var, target_var, mask_var], [loss_fn, eval_fn, predicted_probs], updates=updates_with_momentum) self.validate_fn = theano.function( [input_var, target_var, mask_var], [loss_fn_val, eval_fn_val, predicted_probs_val])
def TestEmbeddingLayer(): n_in = 60 a=np.random.uniform(0, 1, (20, 300, n_in)).round().astype(np.int32) n_out = 5 x = T.itensor3('x') layer = MetaEmbeddingLayer(x, n_in, n_out) f = theano.function([x], [layer.output, layer.pcenters]) b, pcenter = f(a) print(b[0, 1, 2]) print(b[0, 1, 20]) print(a.shape) batch=np.random.randint(0, 20) row1 = np.random.randint(0, 100) row2 = np.random.randint(0, 100) v1= a[batch][row1] v2= a[batch][row2] print(b.shape) print(b[batch][row1][row2]) c = np.outer( v1, v2) d = c[:, :, np.newaxis ] e = np.sum( (d * layer.W.get_value() ), axis=(0,1)) print(v1) print(v2) print(e) print('diff: ', abs(e - b[batch][row1][row2] ).sum()) print(pcenter) center = [ np.sum( l.W.get_value(), axis=(0,1) ) for l in layer ] print(center) print(np.sum(center**2))
def _setup_inference_graph(self): """ Connect graphs together for inference and store in/out ports & updates inputs : input, time outputs : output updates : prev_states """ p_input_tbi = tt.itensor3(name='port_i_input') # step_size is a compile time constant for inference s_next_prev_idx = tt.alloc(np.int32(self._options['step_size'] - 1)) outputs = [] self._prev_state_updates = [] for s in self._slices: s_output_tbi, prev_state_updates = self._setup_forward_graph \ (s_input_tbi = s.apply(p_input_tbi), s_time_tb = None, s_next_prev_idx = s.transfer(s_next_prev_idx), v_params = s.v_params, v_prev_states = s.v_prev_states) outputs += [self.transfer(s_output_tbi)] self._prev_state_updates += prev_state_updates # merge outputs from all slices p_output_tbi = tt.concatenate(outputs, axis=1) self._prop_i_ports = [p_input_tbi] self._prop_o_ports = [p_output_tbi]
def ans_fn(self, num_samples, means_only=False): qo = T.itensor3('qo') o_mask = T.matrix('o_mask') N = qo.shape[0] qo_flat = qo.reshape((N * self.num_choices, self.max_length)) qo_emb = embedder(qo_flat, self.embeddings) z, _, _ = self.rec_model.get_samples_and_means_and_covs( qo_flat, qo_emb, num_samples, means_only=means_only) z = z.reshape((N * num_samples, self.num_choices, self.z_dim)) o_mask_rep = T.tile(o_mask, (num_samples, 1)) probs = self.gen_model.get_probs(z, o_mask_rep) probs = probs.reshape((num_samples, N, self.num_choices)) ans = T.argmax(T.mean(probs, axis=0), axis=-1) ans_fn = theano.function(inputs=[qo, o_mask], outputs=[ans, T.mean(probs, axis=0)], allow_input_downcast=True) return ans_fn
def add_datasets_to_graph(list_of_datasets, list_of_names, graph, strict=True, list_of_test_values=None): assert len(list_of_datasets) == len(list_of_names) datasets_added = [] for n, (dataset, name) in enumerate(zip(list_of_datasets, list_of_names)): if dataset.dtype != "int32": if len(dataset.shape) == 1: sym = tensor.vector() elif len(dataset.shape) == 2: sym = tensor.matrix() elif len(dataset.shape) == 3: sym = tensor.tensor3() else: raise ValueError("dataset %s has unsupported shape" % name) elif dataset.dtype == "int32": if len(dataset.shape) == 1: sym = tensor.ivector() elif len(dataset.shape) == 2: sym = tensor.imatrix() elif len(dataset.shape) == 3: sym = tensor.itensor3() else: raise ValueError("dataset %s has unsupported shape" % name) else: raise ValueError("dataset %s has unsupported dtype %s" % ( name, dataset.dtype)) if list_of_test_values is not None: sym.tag.test_value = list_of_test_values[n] tag_expression(sym, name, dataset.shape) datasets_added.append(sym) graph["__datasets_added__"] = datasets_added return datasets_added
def optimiser_fn(self, update, update_kwargs, saved_update=None): qo = T.itensor3('qo') o_mask = T.matrix('o_mask') a = T.ivector('a') learning_rate = T.scalar('learning_rate') p_a = self.obj(qo, o_mask, a, deterministic=False) grads = T.grad(-p_a, self.params, disconnected_inputs='ignore') update_kwargs['loss_or_grads'] = grads update_kwargs['params'] = self.params update_kwargs['learning_rate'] = learning_rate updates = update(**update_kwargs) if saved_update is not None: for u, v in zip(updates, saved_update.keys()): u.set_value(v.get_value()) optimiser = theano.function( inputs=[qo, o_mask, a, learning_rate], outputs=p_a, updates=updates, allow_input_downcast=True, ) return optimiser, updates
def make_node(self, x, x2, x3, x4, x5): # check that the theano version has support for __props__. # This next line looks like it has a typo, # but it's actually a way to detect the theano version # is sufficiently recent to support the use of __props__. assert hasattr( self, '_props' ), "Your version of theano is too old to support __props__." x = tensor.as_tensor_variable(x) x2 = tensor.as_tensor_variable(x2) x3 = tensor.as_tensor_variable(x3) x4 = tensor.as_tensor_variable(x4) x5 = tensor.as_tensor_variable(x5) if prm.att_doc: if prm.compute_emb: td = tensor.itensor4().type() else: td = tensor.ftensor4().type() tm = tensor.ftensor3().type() else: if prm.compute_emb: td = tensor.itensor3().type() else: td = tensor.ftensor3().type() tm = tensor.fmatrix().type() return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \ tensor.fmatrix().type(), tensor.ivector().type()])
def create_theano_function(word_embed, char_embed, values=None): word_x = T.itensor3('word_x') word_mask = T.tensor3('word_mask') sent_mask = T.matrix('sent_mask') label_y = T.ivector('label_y') att_out, network_output, loss = fn.build_fn(word_x=word_x, word_mask=word_mask, sent_mask=sent_mask, label_y=label_y, word_embed=word_embed, char_embed=None, args=args) if values is not None: lasagne.layers.set_all_param_values(network_output, values, trainable=True) params = lasagne.layers.get_all_params(network_output, trainable=True) if args.optimizer == 'sgd': updates = lasagne.updates.sgd(loss, params, args.learning_rate) elif args.optimizer == 'momentum': updates = lasagne.updates.momentum(loss, params, args.learning_rate) train_fn = theano.function([word_x, word_mask, sent_mask, label_y], loss, updates=updates) prediction = lasagne.layers.get_output(network_output, deterministic=True) eval_fn = theano.function([word_x, word_mask, sent_mask], prediction) fn_check_attention = theano.function([word_x, word_mask, sent_mask], att_out) return fn_check_attention, eval_fn, train_fn, params
def RelationStackMaker(chips, params, graph=False, weighted=False, batched=False): assert 'emb_matrices' in params or 'wemb_matrix' in params if 'emb_matrices' in params : assert type(params['emb_matrices']) == list num_inputs = len(params['emb_matrices']) else: num_inputs = 1 if batched: emb_inputs = [T.itensor3('emb_input_'+str(i)) for i in range(num_inputs)] entities_tv = [T.fmatrix('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])] if graph: if weighted: masks = T.ftensor4('child_mask') else: masks = T.ftensor3('child_mask') else: masks = T.fmatrix('batch_mask') else: emb_inputs = [T.imatrix('emb_input_'+str(i)) for i in range(num_inputs)] entities_tv = [T.fvector('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])] if graph: if weighted: masks = T.ftensor3('child_mask') else: masks = T.fmatrix('child_mask') else: masks = None #print masks, type(masks), masks.ndim if len(emb_inputs) == 1: current_chip = Start(params['voc_size'], emb_inputs[0]) else: current_chip = Start(params['voc_size'], emb_inputs) print '\n', 'Building Stack now', '\n', 'Start: ', params['voc_size'] #, 'out_tv size:', len(current_chip.output_tv) instantiated_chips = stackLayers(chips, current_chip, params, entity_size=params['num_entity']) trainable_parameters = computeLayers(instantiated_chips, current_chip, params, entities_input=entities_tv, mask=masks)
def test_broadcasts(): A = T.imatrix() A_S = A.dimshuffle(0, 'x',1) func_shuffle = theano.function([A], A_S) A_value = [[1,2], [3,4]] AS_value = func_shuffle(A_value) print A_value print AS_value print AS_value.shape B = T.itensor3() AB = A_S + B func_add = theano.function([A_S, B], AB) B_value = [ A_value, A_value] AB_value = func_add(AS_value, B_value) print AB_value.shape AA = A[[0,0,0,0]] func_embed = theano.function([A], AA) AA_value = func_embed(A_value) print AA_value """
def __init__(self, input_dim, proj_dim=128, neg_samples = 4, init='uniform', activation='sigmoid', weights=None, W_regularizer = None, activity_regularizer = None, **kwargs): super(WordTagContextProduct, self).__init__(**kwargs) self.input_dim = input_dim self.proj_dim = proj_dim self.samples = neg_samples + 1 self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.input = [T.itensor3(), T.itensor3()] self.W_w = self.init((input_dim, proj_dim)) self.params = [self.W_w] if weights is not None: self.set_weights(weights)
def load_pretrained_model(self, model_path, model_name): # Load model and dictionaries print("Loading model params...") params = load_params('%s/%s' % (model_path, model_name)) print("Loading dictionaries...") with open('%s/dict.pkl' % model_path, 'rb') as f: self.chardict = pkl.load(f) with open('%s/label_dict.pkl' % model_path, 'rb') as f: labeldict = pkl.load(f) self.n_char = len(self.chardict.keys()) + 1 n_classes = len(labeldict.keys()) print "#classes:", n_classes print labeldict print("Building network...") # Tweet variables tweet = T.itensor3() targets = T.imatrix() # masks t_mask = T.fmatrix() # network for prediction predictions = classify(tweet, t_mask, params, n_classes, self.n_char) # Theano function print("Compiling theano functions...") self.predict = theano.function([tweet, t_mask], predictions)
def __init__(self,rng,model_params): self.input = T.itensor3('input') # the data is a minibatch self.label = T.imatrix('label') # label's shape (mini_batch size, max_term_per_sent) self.sent_length= T.ivector('sent_length') # sent_length is the number of terms in each sentence self.masks = T.imatrix('masks') # masks which used in error and likelihood calculation self.core = SentenceLevelNeuralModelCore(rng,self.input,self.label,self.sent_length,self.masks,model_params) self.params = self.core.wordvec.params() \ + self.core.POSvec.params() \ + self.core.wordpos_vec.params() \ + self.core.verbpos_vec.params() \ + self.core.conv_word.params() \ + self.core.conv_POS.params() \ + self.core.conv_wordpos.params() \ + self.core.conv_verbpos.params() \ + self.core.hidden_layer.params self.L2_sqr = (self.core.wordvec.embeddings ** 2).sum() \ + (self.core.POSvec.embeddings ** 2).sum() \ + (self.core.wordpos_vec.embeddings ** 2).sum() \ + (self.core.verbpos_vec.embeddings ** 2).sum() \ + (self.core.conv_word.W ** 2).sum() \ + (self.core.conv_POS.W ** 2).sum() \ + (self.core.conv_wordpos.W ** 2).sum() \ + (self.core.conv_verbpos.W ** 2).sum() \ + (self.core.hidden_layer.W ** 2).sum() self.negative_log_likelihood = self.core.likelihood() self.errors = self.core.errors() # we only use L2 regularization self.cost = self.negative_log_likelihood \ + self.core.L2_reg * self.L2_sqr self.gparams = [] for param in self.params: gparam = T.grad(self.cost, param) self.gparams.append(gparam) self.updates = [] learning_rate = model_params['learning_rate'] for param, gparam in zip(self.params, self.gparams): self.updates.append((param, param - learning_rate * gparam)) #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_word.output,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_POS.output,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_verbpos.output,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_wordpos.output,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_out,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.max_out,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.hidden_layer.output,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.negative_log_likelihood,on_unused_input='ignore') #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.cost,on_unused_input='ignore') self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.cost,updates=self.updates,on_unused_input='ignore') self.valid_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=[self.errors,self.core.sentce_loglikelihood.y_pred_pointwise],on_unused_input='ignore')
def __init__(self,batch_size=16, seed=1234,nhu=300,width=5,n_out=len(nerarray),activation_f="hardtanh", embeddingfile=senna_embmtxfile,trainingfile=trainingfile,paramfile=None): modeldir=os.path.join(nerdir,"models",'model_%i'%(len(os.listdir(nerdir+"/models")))) os.mkdir(modeldir) for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig(filename=os.path.join(modeldir,'log.txt'), level=logging.INFO, format='%(asctime)s : %(levelname)s : %(message)s') logger.info("\n"+"\n".join(["\t%s : "%key+str(val) for key,val in locals().iteritems() if key!="self"])) self.modeldir=modeldir self.batch_size = batch_size activation=None if activation_f=="hardtanh": activation=hardtanh elif activation_f=="tanh": activation=T.tanh self.load_data(embeddingfile,trainingfile,batch_size) #============================================================================== # BUILD MODEL #============================================================================== logger.info('... building the model') # allocate symbolic variables for the data self.index = T.iscalar() # index to a [mini]batch self.x = T.itensor3('x') # the data is presented as matrix of integers self.y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels self.permutation = T.ivector('permutation') if paramfile!=None: params=pickle.load(open(paramfile,"rb")) else: params=None self.model = SennaNER(input=self.x, embeddings=self.embeddings,features=capsfeatures,n_out=n_out, mini_batch_size=batch_size, nhu=nhu,width=width,activation=activation,seed=seed,params=params) self.test_model = theano.function(inputs=[self.index], outputs=self.model.errors(self.y), givens={ self.x: self.test_set_x[self.index * batch_size:(self.index + 1) * batch_size], self.y: self.test_set_y[self.index * batch_size:(self.index + 1) * batch_size]}, name="test_model") self.validation_cost = theano.function(inputs=[self.index], outputs=self.model.negative_log_likelihood(self.y), givens={ self.x: self.valid_set_x[self.index * batch_size:(self.index + 1) * batch_size], self.y: self.valid_set_y[self.index * batch_size:(self.index + 1) * batch_size]}, name="validation_cost") self.predictions = theano.function(inputs=[self.index], outputs=self.model.predictions, givens={ self.x: self.test_set_x[self.index * batch_size:(self.index + 1) * batch_size]}, name="predictions") self.visualize_hidden = theano.function(inputs=[self.index], outputs=self.model.HiddenLayer.output, givens={ self.x: self.valid_set_x[self.index * batch_size:(self.index + 1) * batch_size]}, name="visualize_hidden")
def ndim_itensor(ndim, name=None): if ndim == 2: return T.imatrix(name) elif ndim == 3: return T.itensor3(name) elif ndim == 4: return T.itensor4(name) return T.imatrix(name=name)
def save(self, repo, filename): params = getParams(self, T.itensor3()) index = 0 while os.path.isfile(os.path.join(repo, filename + "_" + str(index))): index += 1 filename = filename + "_" + str(index) with closing(open(os.path.join(repo, filename), 'wb')) as f: pickle.dump(params, f, protocol=pickle.HIGHEST_PROTOCOL)
def tensor_max(): e = np.asarray([[[2, 4], [5, 1]], [[3, 5], [4, 6]]], dtype='int32') w = T.itensor3('w') y = T.max(w, axis=1) f = theano.function(inputs=[w], outputs=y) print f(e)
def __init__(self, nh, nc, ne, de, cs): ''' nh ::隐藏层神经元个数 nc ::输出层标签分类类别 ne :: 单词的个数 de :: 词向量的维度 cs :: 上下文窗口 ''' #词向量实际为(ne, de),外加1行,是为了边界标签-1而设定的 self.emb = theano.shared(name='embeddings',value=0.2 * numpy.random.uniform(-1.0, 1.0,(ne+1, de)).astype(theano.config.floatX))#词向量空间 self.wx = theano.shared(name='wx',value=0.2 * numpy.random.uniform(-1.0, 1.0,(de * cs, nh)).astype(theano.config.floatX))#输入数据到隐藏层的权重矩阵 self.wh = theano.shared(name='wh', value=0.2 * numpy.random.uniform(-1.0, 1.0,(nh, nh)).astype(theano.config.floatX))#上一时刻隐藏到本时刻隐藏层循环递归的权值矩阵 self.w = theano.shared(name='w',value=0.2 * numpy.random.uniform(-1.0, 1.0,(nh, nc)).astype(theano.config.floatX))#隐藏层到输出层的权值矩阵 self.bh = theano.shared(name='bh', value=numpy.zeros(nh,dtype=theano.config.floatX))#隐藏层偏置参数 self.b = theano.shared(name='b',value=numpy.zeros(nc,dtype=theano.config.floatX))#输出层偏置参数 self.h0 = theano.shared(name='h0',value=numpy.zeros(nh,dtype=theano.config.floatX)) self.lastlabel=theano.shared(name='lastlabel',value=0.2 * numpy.random.uniform(-1.0, 1.0,(nc, nc)).astype(theano.config.floatX)) self.prelabel=theano.shared(name='prelabel',value=0.2 * numpy.random.uniform(-1.0, 1.0,(nc, nc)).astype(theano.config.floatX)) self.bhmm=theano.shared(name='bhmm',value=numpy.zeros(nc,dtype=theano.config.floatX)) self.params = [self.emb, self.wx, self.wh, self.w,self.bh, self.b, self.h0,self.lastlabel,self.prelabel,self.bhmm]#所有待学习的参数 lr = T.scalar('lr')#学习率,一会儿作为输入参数 idxs = T.itensor3() x = self.emb[idxs].reshape((idxs.shape[0],idxs.shape[1],de*idxs.shape[2])) y_sentence = T.imatrix('y_sentence') # 训练样本标签,二维的(batch,sentence) def step(x_t, h_tm1): h_t = T.nnet.sigmoid(T.dot(x_t, self.wx) + T.dot(h_tm1, self.wh) + self.bh)#通过ht-1、x计算隐藏层 s_temp=T.dot(h_t, self.w) + self.b#由于softmax不支持三维矩阵操作,所以这边需要对其进行reshape成2D,计算完毕后再reshape成3D return h_t, s_temp [h,s_temp], _ = theano.scan(step,sequences=x,outputs_info=[T.ones(shape=(x.shape[1],self.h0.shape[0])) * self.h0, None]) p_y =T.nnet.softmax(T.reshape(s_temp,(s_temp.shape[0]*s_temp.shape[1],-1))) p_y=T.reshape(p_y,s_temp.shape) #加入前一时刻的标签约束项 y_label3d = T.ftensor3('y_sentence3d') p_ytrain=self.add_layer(p_y,y_label3d) loss=self.nll_multiclass(p_ytrain,y_sentence)+0.0*((self.wx**2).sum()+(self.wh**2).sum()+(self.w**2).sum()) #神经网络的输出 sentence_gradients = T.grad(loss, self.params) sentence_updates = OrderedDict((p, p - lr*g) for p, g in zip(self.params, sentence_gradients)) self.sentence_traintemp = theano.function(inputs=[idxs,y_sentence,y_label3d,lr],outputs=loss,updates=sentence_updates) '''self.sentence_train = theano.function(inputs=[idxs,y_sentence,lr],outputs=loss,updates=sentence_updates)''' #词向量归一化,因为我们希望训练出来的向量是一个归一化向量 self.normalize = theano.function(inputs=[],updates={self.emb:self.emb /T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')}) #构造预测函数、训练函数,输入数据idxs每一行是一个样本(也就是一个窗口内的序列索引) #) self.classify = theano.function(inputs=[idxs], outputs=p_y)
def build_image_only_network(d_word, d_hidden, lr, eps=1e-6): # input theano vars in_context_fc7 = T.tensor3(name='context_images') in_context_bb = T.tensor4(name='context_bb') in_bbmask = T.tensor3(name='bounding_box_mask') in_context = T.itensor4(name='context') in_cmask = T.tensor4(name='context_mask') in_answer_fc7 = T.matrix(name='answer_images') in_answer_bb = T.matrix(name='answer_bb') in_answers = T.itensor3(name='answers') in_amask = T.tensor3(name='answer_mask') in_labels = T.imatrix(name='labels') # define network l_context_fc7 = lasagne.layers.InputLayer(shape=(None, 3, 4096), input_var=in_context_fc7) l_answers = lasagne.layers.InputLayer(shape=(None, 3, max_words), input_var=in_answers) l_amask = lasagne.layers.InputLayer(shape=l_answers.shape, input_var=in_amask) # contexts and answers should share embeddings l_answer_emb = lasagne.layers.EmbeddingLayer(l_answers, len_voc, d_word) l_context_proj = lasagne.layers.DenseLayer( l_context_fc7, num_units=d_hidden, nonlinearity=lasagne.nonlinearities.rectify, num_leading_axes=2) l_context_final_reps = lasagne.layers.LSTMLayer(l_context_proj, num_units=d_hidden, only_return_final=True) l_ans_reps = SumAverageLayer([l_answer_emb, l_amask], compute_sum=True, num_dims=3) l_scores = InnerProductLayer([l_context_final_reps, l_ans_reps]) preds = lasagne.layers.get_output(l_scores) loss = T.mean(lasagne.objectives.categorical_crossentropy( preds, in_labels)) all_params = lasagne.layers.get_all_params(l_scores, trainable=True) updates = lasagne.updates.adam(loss, all_params, learning_rate=lr) train_fn = theano.function([ in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask, in_answer_fc7, in_answer_bb, in_answers, in_amask, in_labels ], loss, updates=updates, on_unused_input='warn') pred_fn = theano.function([ in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask, in_answer_fc7, in_answer_bb, in_answers, in_amask ], preds, on_unused_input='warn') return train_fn, pred_fn, l_scores
def make_node(self, x1, x2, x3, x4): assert hasattr(self, '_props'), "Your version of theano is too old to support __props__." x1 = tensor.as_tensor_variable(x1) x2 = tensor.as_tensor_variable(x2) x3 = tensor.as_tensor_variable(x3) x4 = tensor.as_tensor_variable(x4) out = [tensor.fmatrix().type(), tensor.itensor3().type(), tensor.imatrix().type(), tensor.fmatrix().type()] return theano.Apply(self, [x1, x2, x3, x4], out)
def attention_q(): query = T.itensor3('query') cands = T.itensor3('cands') d = 2 W1_c = theano.shared(np.random.randint(-3, 3, (d, d))) # W1_c = theano.shared(np.ones((d, d), dtype='int32')) W1_h = theano.shared(np.random.randint(-3, 3, (d, d))) # W1_h = theano.shared(np.ones((d, d), dtype='int32')) w = theano.shared(np.ones((d,), dtype='float32')) W2_r = theano.shared(np.random.randint(-1, 1, (d, d))) W2_h = theano.shared(np.random.randint(-1, 1, (d, d))) # W2_r = theano.shared(np.ones((d, d), dtype='float32')) # W2_h = theano.shared(np.ones((d, d), dtype='float32')) # q_in = np.asarray([[[1, 2], [3, 4], [5, 6]]], dtype='int32') q_in = np.ones((1, 3, 2), dtype='int32') # C_in = np.ones((1, 3, 2), dtype='int32') # C_in = np.ones((4, 3, 3, 2), dtype='int32') C_in = np.asarray(np.random.randint(-2, 2, (1, 3, 2)), dtype='int32') def forward(query, cands, eps=1e-8): # cands: 1D: n_queries, 2D: n_cands-1, 3D: dim_h # query: 1D: n_queries, 2D: n_words, 3D: dim_h # mask: 1D: n_queries, 2D: n_cands, 3D: n_words # 1D: n_queries, 2D: n_cands-1, 3D: n_words, 4D: dim_h M = T.dot(query, W1_c).dimshuffle(0, 'x', 1, 2) + T.dot(cands, W1_h).dimshuffle(0, 1, 'x', 2) # 1D: n_queries, 2D: n_cands-1, 3D: n_words alpha = T.nnet.softmax(T.dot(M, w).reshape((cands.shape[0] * cands.shape[1], query.shape[1]))) alpha = alpha.reshape((cands.shape[0], cands.shape[1], query.shape[1], 1)) # 1D: n_queries, 2D: n_cands-1, 3D: n_words r = T.sum(query.dimshuffle((0, 'x', 1, 2)) * alpha, axis=2) # 4 * 3 * 2 # 1D: n_queries, 2D: n_cands, 3D: dim_h h_after = T.dot(r, W2_r) # 4 * 3 * 2 # return h_after, h_after return h_after, r, alpha.reshape((alpha.shape[0], alpha.shape[1], alpha.shape[2])), M y, a, b, c = forward(query, cands) f = theano.function(inputs=[query, cands], outputs=[y, a, b, c], on_unused_input='ignore') print f(q_in, C_in)
def get_inps(use_mask=True, vgen=None, use_bow_out=False, debug=False, output_map=None): if use_mask: X, y, mask, cmask = TT.itensor3("X"), TT.imatrix("y"), TT.fmatrix("mask"), \ TT.fmatrix("cost_mask") qmask = TT.fmatrix("qmask") bow_out = TT.ftensor3("bow_out") if debug: theano.config.compute_test_value = "warn" batch = vgen.next() X.tag.test_value = batch['x'].astype("int32") y.tag.test_value = batch['y'].astype("int32") mask.tag.test_value = batch['mask'].astype("float32") cmask.tag.test_value = batch['cmask'].astype("float32") qmask.tag.test_value = batch["qmask"].astype("float32") if use_bow_out: bow_out.tag.test_value = batch['bow_out'].astype("float32") if output_map: outs = {} outs["X"] = X outs["y"] = y outs["mask"] = mask outs["cmask"] = cmask if use_bow_out: outs["bow_out"] = bow_out outs["qmask"] = qmask else: outs = [X, y, mask, cmask] if use_bow_out: outs += [bow_out] return outs else: X, y = TT.itensor3("X"), TT.itensor3("y") if debug: theano.config.compute_test_value = "warn" batch = vgen.next() X.tag.test_value = batch['x'] y.tag.test_value = batch['y'] return [X, y]
def test_lookup(): a = T.itensor3() b = T.ivector() y = a[0][T.arange(b.shape[0]), b] f = theano.function(inputs=[a, b], outputs=[y]) u = [[[1, 2], [2, 4]], [[3, 1], [2, 1]]] c = [0, 1] print f(u, c)
def copy(): e = np.asarray([[[2, 4], [5, 1]], [[3, 5], [4, 6]]], dtype='int32') w = T.itensor3('w') u = T.ones(shape=(2, w.shape[2])) y = T.repeat(T.max(w, axis=1, keepdims=True), 2, 1) # y = T.max(w, axis=1, keepdims=True) * u f = theano.function(inputs=[w], outputs=y) print f(e)
def __init__(self, rng, embeddings, char_embeddings, hiddensize, char_hiddensize, embedding_dim, char_embedding_dim, window_size, num_tags, dic_size, dropout_rate=0.7): self.rng = rng self.inputX = T.imatrix( 'inputX') # a sentence, shape (T * window_size) self.inputX_chars = T.itensor3( 'inputX_chars' ) # a sentence, shape (T * max numbe of chars in a word) self.inputY = T.ivector('inputY') # tags of a sentence self.is_train = T.iscalar('is_train') self.new_theta = T.fmatrix('new_theta') self.dropout_rate = dropout_rate self.nhidden = hiddensize self.char_nhidden = char_hiddensize # for now set the number of hidden units the same self.embedding_dim = embedding_dim self.char_embedding_dim = char_embedding_dim self.window_size = window_size self.n_classes = num_tags self.dic_size = dic_size # for testing in compling self.inputX.tag.test_value = np.ones( (10, window_size)).astype(np.int32) self.inputX_chars.tag.test_value = np.ones( (10, window_size, 8)).astype(np.int32) self.inputY.tag.test_value = np.ones(10).astype(np.int32) self.Embeddings = theano.shared(value=embeddings, name="Embeddings", borrow=True) self.Char_Embeddings = theano.shared(value=char_embeddings, name="Char_Embeddings", borrow=True) # word embeddings self.inputW = self.Embeddings[self.inputX] # char embeddings self.inputC = self.Char_Embeddings[self.inputX_chars].dimshuffle( [2, 0, 1, 3]) self.params = [self.Embeddings, self.Char_Embeddings]
def __init__(self, input_dim, proj_dim=128, neg_samples = 4,tensor_slices = 4, slice_dim = 16, init='uniform', activation='tanh', weights=None,W_regularizer = None, activity_regularizer=None, **kwargs): super(WordTagContextProduct_tensor, self).__init__(**kwargs) self.input_dim = input_dim self.proj_dim = proj_dim self.samples = neg_samples + 1 #np.random.seed(0) self.init = initializations.get(init) self.activation = activations.get(activation) self.W_regularizer = regularizers.get(W_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.input = [T.itensor3(), T.itensor3()] self.W_w = self.init((input_dim, proj_dim)) self.tensor_slices = tensor_slices self.slice_dim = slice_dim self.params = [self.W_w] if weights is not None: self.set_weights(weights)
def test_get_output_for(self): X = T.itensor3() X1 = np.empty((2, 2, 10), dtype='int32') for i, is_ in enumerate(itertools.product(*(range(n) for n in X1.shape[:-1]))): X1[is_] = np.arange(i, 10 + i) X2 = np.empty((2, 2, 3), dtype='int32') for i, is_ in enumerate(itertools.product(*(range(n) for n in X2.shape[:-1]))): X2[is_] = np.arange(7 + i, 10 + i) self.assertTrue(np.array_equal( theano.function([X], KMaxPool1DLayer(InputLayer((100, 100)), 3).get_output_for(X))(X1), X2 ))
def dot(): e1 = np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype='int32') e2 = np.asarray([[1, 2], [3, 1]], dtype='int32') w = T.itensor3('w') v = T.imatrix('v') y = T.batched_dot(v, w.dimshuffle(0, 2, 1)) u = w.T f = theano.function(inputs=[v, w], outputs=y) f2 = theano.function(inputs=[w], outputs=u) print f(e2, e1)
def __init__(self, nh, nc, ne, de, cs, bs): ''' nh :: dimension of the hidden layer nc :: number of classes ne :: number of word embeddings in the vocabulary de :: dimension of the word embeddings cs :: word window context size bs :: batch size (number of samples) ''' idxs = T.itensor3() # time->samples->features as many columns as context window size/lines as words in the sentence # Data is given as a tensor (batch, sequence, context size) l_in = lasagne.layers.InputLayer((bs, None, cs), idxs) # We have a tensor (batch size, sequence length, concatenated context win. embeddings) l_emb = lasagne.layers.EmbeddingLayer(l_in, ne, de) l_flatt_emb = lasagne.layers.flatten(l_emb, outdim=3) print("Output of after embedding: {0}".format(lasagne.layers.get_output_shape(l_flatt_emb, (bs, 11, cs)))) # Define recurent layer l_r = lasagne.layers.RecurrentLayer(l_flatt_emb, nh, nonlinearity=lasagne.nonlinearities.sigmoid) # Output shape should be (batch size, sequence, hidden) print("Output after recurrence: {0}".format(lasagne.layers.get_output_shape(l_r, (bs, 11, cs)))) l_res = lasagne.layers.ReshapeLayer(l_r, (-1, l_r.output_shape[2])) print("Output after reshape: {0}".format(lasagne.layers.get_output_shape(l_res, (bs, 11, cs)))) l_out = lasagne.layers.DenseLayer(l_res, nc, nonlinearity=lasagne.nonlinearities.softmax) print("Output shape: {0}".format(lasagne.layers.get_output_shape(l_out, (bs, 11, cs)))) y_sentence = T.ivector('y_sentence') y_mask = T.vector('y_mask') pred = lasagne.layers.get_output(l_out) c_pred = T.argmax(pred, axis = 1) sentence_nll = T.mean(lasagne.objectives.categorical_crossentropy(pred, y_sentence) * y_mask) sentence_error = T.sum(T.neq(c_pred, y_sentence)*y_mask) params = lasagne.layers.get_all_params(l_out) sentence_gradients = T.grad(sentence_nll, params) lr = 0.0627142536696559 #sentence_updates = OrderedDict((p, p - lr*g) for p, g in zip(params, sentence_gradients)) sentence_updates = lasagne.updates.momentum(sentence_nll, params, lr) self.train_sentence = theano.function(inputs = [idxs, y_sentence, y_mask], outputs = sentence_nll, updates = sentence_updates) self.normalize = theano.function( inputs = [], updates = {l_emb.W: l_emb.W/T.sqrt((l_emb.W**2).sum(axis=1)).dimshuffle(0,'x')}) self.errors = theano.function(inputs=[idxs, y_sentence, y_mask], outputs=sentence_error)
def test_recurrent_lookup_table(self): E = np.random.uniform(size=(self.vocab_size, self.n_in)).astype('float32') W = np.random.uniform(size=(self.n_out, self.vocab_size)).astype('float32') b = np.random.uniform(size=self.vocab_size).astype('float32') embeddings = LookupTable(vocab_size=self.vocab_size, embedding_size=self.n_in, window_size=1, E_init=E, advanced_indexing=True) net = Recurrent([embeddings, self.layer, LinearLayer(n_in=self.n_out, n_out=self.vocab_size, W_init=W, b_init=b), Softmax()]) x = np.array([ [ [0], [2] ], [ [3], [1] ] ]).astype('int32') x_var = T.itensor3() p_var = net.forward(x_var) # the predictions for the two examples y = np.array([0, 2]).astype('int32') y_one_hot = np.array([[1, 0, 0, 0, 0], [0, 0, 1, 0, 0]]).astype('int32') # time 1 h1, mem1 = self.recurrent_step(E[[0,3]], *self.mem0) z1 = h1.dot(W) + b p1 = np.exp(z1) / np.exp(z1).sum(axis=1)[:, np.newaxis] # time 2 h2, mem2 = self.recurrent_step(E[[2,1]], *mem1) z2 = h2.dot(W) + b p2 = np.exp(z2) / np.exp(z2).sum(axis=1)[:, np.newaxis] f = function([x_var], p_var, allow_input_downcast=True, on_unused_input='warn') got_p = f(x) self.assertTrue(np.allclose(got_p, p2)) y_var = T.ivector() loss = cross_entropy_loss(p_var, y_var, one_hot_num_classes=self.vocab_size) f_loss = function([x_var, y_var], [p_var, loss], allow_input_downcast=True, on_unused_input='warn') got_p_out, got_loss = f_loss(x, y) expect_p_out = p2 expect_loss = -1 * np.sum(y_one_hot * np.log(expect_p_out)) / 2. self.assertTrue(np.allclose(got_p_out, expect_p_out)) self.assertTrue(np.allclose(got_loss, expect_loss))
def zero_pad_gate(): dim_emb = 2 window = 1 # w = T.imatrix('w') w = T.itensor3('w') zero = T.zeros((1, 1, dim_emb * window), dtype=theano.config.floatX) # y = T.eq(w, zero) y = T.eq(T.sum(T.eq(w, zero), 2, keepdims=True), 0) * w f = theano.function(inputs=[w], outputs=[y]) e = np.asarray([[[2, 4], [0, 0]], [[3, 2], [4, 1]]], dtype='int32') print f(e)
def make_node(self, x, y, len_x, len_y): x = theano.tensor.as_tensor_variable(x) assert x.ndim == 3 # tensor: nframes x nseqs x dim y = theano.tensor.as_tensor_variable(y) assert y.ndim == 2 # matrix: nseqs x max_labelling_length len_x = theano.tensor.as_tensor_variable(len_x) len_y = theano.tensor.as_tensor_variable(len_y) assert len_x.ndim == 1 # vector of seqs lengths assert len_x.dtype == "int32" assert len_y.ndim == 1 # vector of seqs lengths assert len_y.dtype == "int32" return theano.Apply(self, [x, y, len_x, len_y], [T.ftensor3(),T.itensor3()])
def test_model(args): _, dev, test, vmap = load_dataset(args.tweet_file, args.testfile, args.vocab) labelmap = cPickle.load(open(args.label_file, 'r')) nclasses = len(labelmap) X = T.itensor3('X') M = T.matrix('M') y = T.ivector('y') print "building model" network = build_model(vmap, nclasses, invar=X, maskvar=M) print "loading params" network = read_model_data(network, args.model_file)
def main(data_path, model_path, dict_path, save_path): print("Preparing Data...") # Load data and dictionary X = [] with io.open(data_path,'r',encoding='utf-8') as f: for line in f: X.append(line.rstrip('\n')) with open(dict_path, 'rb') as f: chardict = pkl.load(f) n_char = len(chardict.keys()) + 1 # Prepare data for encoding batches = Batch(X) # Load model print("Loading model params...") params = load_params(model_path) # Build encoder print("Building encoder...") # Theano variables tweet = T.itensor3() t_mask = T.fmatrix() # Embeddings emb_t = tweet2vec(tweet, t_mask, params, n_char)[0] # Theano function f_enc = theano.function([tweet, t_mask], emb_t) # Encode print("Encoding data...") print("Input data {} samples".format(len(X))) features = np.zeros((len(X),WDIM), dtype='float32') it = 0 for x,i in batches: if it % 100 == 0: print("Minibatch {}".format(it)) it += 1 xp, x_mask = prepare_data(x, chardict) ff = f_enc(xp, x_mask) for ind, idx in enumerate(i): features[idx] = ff[ind] # Save with open(save_path, 'w') as o: np.save(o, features)
def _init_model(self, in_size, out_size, n_hid=10, learning_rate_sl=0.005, \ learning_rate_rl=0.005, batch_size=32, ment=0.1): # 2-layer MLP self.in_size = in_size # x and y coordinate self.out_size = out_size # up, down, right, left self.batch_size = batch_size self.learning_rate = learning_rate_rl self.n_hid = n_hid input_var, turn_mask, act_mask, reward_var = T.ftensor3('in'), T.imatrix('tm'), \ T.itensor3('am'), T.fvector('r') in_var = T.reshape(input_var, (input_var.shape[0]*input_var.shape[1],self.in_size)) l_mask_in = L.InputLayer(shape=(None,None), input_var=turn_mask) pol_in = T.fmatrix('pol-h') l_in = L.InputLayer(shape=(None,None,self.in_size), input_var=input_var) l_pol_rnn = L.GRULayer(l_in, n_hid, hid_init=pol_in, mask_input=l_mask_in) # B x H x D pol_out = L.get_output(l_pol_rnn)[:,-1,:] l_den_in = L.ReshapeLayer(l_pol_rnn, (turn_mask.shape[0]*turn_mask.shape[1], n_hid)) # BH x D l_out = L.DenseLayer(l_den_in, self.out_size, nonlinearity=lasagne.nonlinearities.softmax) self.network = l_out self.params = L.get_all_params(self.network) # rl probs = L.get_output(self.network) # BH x A out_probs = T.reshape(probs, (input_var.shape[0],input_var.shape[1],self.out_size)) # B x H x A log_probs = T.log(out_probs) act_probs = (log_probs*act_mask).sum(axis=2) # B x H ep_probs = (act_probs*turn_mask).sum(axis=1) # B H_probs = -T.sum(T.sum(out_probs*log_probs,axis=2),axis=1) # B self.loss = 0.-T.mean(ep_probs*reward_var + ment*H_probs) updates = lasagne.updates.rmsprop(self.loss, self.params, learning_rate=learning_rate_rl, \ epsilon=1e-4) self.inps = [input_var, turn_mask, act_mask, reward_var, pol_in] self.train_fn = theano.function(self.inps, self.loss, updates=updates) self.obj_fn = theano.function(self.inps, self.loss) self.act_fn = theano.function([input_var, turn_mask, pol_in], [out_probs, pol_out]) # sl sl_loss = 0.-T.mean(ep_probs) sl_updates = lasagne.updates.rmsprop(sl_loss, self.params, learning_rate=learning_rate_sl, \ epsilon=1e-4) self.sl_train_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss, \ updates=sl_updates) self.sl_obj_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss)
def _setup_params(self): weight_scale = None # In this implementation, all hidden layers, terminal nodes should have same vector size assert self.input_n == self.output_n self.W_e1 = self.create_weight(self.input_n, self.output_n, "enc1", scale=weight_scale) self.W_e2 = self.create_weight(self.input_n, self.output_n, "enc2", scale=weight_scale) self.B_e = self.create_bias(self.output_n, "enc") self.W_d1 = self.create_weight(self.output_n, self.output_n, "dec1", scale=weight_scale) self.W_d2 = self.create_weight(self.output_n, self.input_n, "dec2", scale=weight_scale) self.B_d1 = self.create_bias(self.output_n, "dec1") self.B_d2 = self.create_bias(self.input_n, "dec2") self.init_gW_d1 = theano.shared(np.zeros_like(self.W_d1.get_value())) self.init_gW_d2 = theano.shared(np.zeros_like(self.W_d2.get_value())) self.init_gB_d1 = theano.shared(np.zeros_like(self.B_d1.get_value())) self.init_gB_d2 = theano.shared(np.zeros_like(self.B_d2.get_value())) self.h0 = None if self.additional_h: self.h0 = self.create_vector(self.output_n, "h0") self.W = [] self.B = [] self.params = [self.W_e1, self.W_e2, self.B_e, self.W_d1, self.W_d2, self.B_d1, self.B_d2] if self.deep: # Set parameters for deep encoding layer self.W_ee = self.create_weight(self.output_n, self.output_n, "deep_enc", scale=weight_scale) self.B_ee = self.create_bias(self.output_n, "deep_enc") self.params.extend([self.W_ee, self.B_ee]) self.init_registers = self.create_matrix(self.max_reg + 1, self.output_n, "init_regs") self.zero_rep = self.create_vector(self.output_n, "zero_rep") # Inputs for all self._vars.seq = T.imatrix("seq") # Inputs for training self._vars.back_routes = T.itensor3("back_routes") self._vars.back_lens = T.ivector("back_lens") self.inputs = [self._vars.seq, self._vars.back_routes, self._vars.back_lens] # Just for decoding self._vars.n = T.iscalar("n") self._vars.p = T.vector("p", dtype=FLOATX) self.encode_inputs = [self._vars.x, self._vars.seq] self.decode_inputs = [self._vars.p, self._vars.seq]
def repeat(): # e = np.asarray([[[2, 4], [5, 1], [2, 1]]], dtype='int32') e = np.asarray([[[2, 4], [5, 1], [2, 10]], [[20, 4], [5, 10], [20, 1]]], dtype='int32') # e = np.asarray([[[4], [1], [2]], [[4], [5], [1]]], dtype='int32') # e = np.asarray([[2, 4], [5, 1]], dtype='int32') w = T.itensor3('w') # w = T.imatrix('w') # y = T.repeat(w, T.cast(w.shape[1], dtype='int32'), 0)[T.arange(w.shape[1]), 1:] # y = T.sum(w, axis=1) y = T.repeat(T.sum(w, axis=1), 2, axis=1).reshape((w.shape[0], 2, 2)) # y = T.repeat(T.repeat(w, T.cast(w.shape[1], dtype='int32'), 0)[T.arange(w.shape[1]), 1:], 2, 0) # y = T.repeat(w, 2, 0) f = theano.function(inputs=[w], outputs=y) print f(e)
def max3d(): e = np.asarray([[[2, 1], [10, 3]], [[3, 5], [4, 6]]], dtype='int32') v = np.asarray([0, 1], dtype='int32') w = T.itensor3('w') a = T.ivector('a') # y = T.max(w, axis=1) # y = T.max(w[:, :, :1], axis=1) # y = w[T.arange(a.shape[0]), a] y = w[:, a] # y_a = y / a # y_r = y % a # y = T.max(w, axis=[1, 2]) # f = theano.function(inputs=[w, a], outputs=[y, y_a, y_r]) f = theano.function(inputs=[w, a], outputs=[y]) print f(e, v)
def lookup(): e = theano.shared(np.asarray([[2, 1], [10, 3], [3, 5], [4, 6]], dtype='int32')) # w = T.imatrix('w') # v = T.imatrix('v') # v = T.itensor3('v') # w = T.ivector('w') v = T.ivector('v') w = T.itensor3('w') y = w[T.arange(w.shape[0]), v] # y = w[:, v] f = theano.function(inputs=[w, v], outputs=[y]) # print f([[1, 2], [3, 4]], # [[0, 1, 1], [1, 0, -1]]) print f([[[0, 0], [0, 1], [0, 1]], [[1, 1], [1, 0], [1, -1]]], [1, 2])