return h1_t, h2_t, h3_t, y_hat ((h1_t, h2_t, h3_t, y_hat), updates) =\ theano.scan(fn=inner_fn, sequences=[x], outputs_info=[h1_init_state, h2_init_state, h3_init_state, None]) reshaped_y = y.reshape((y.shape[0]*y.shape[1], -1)) reshaped_y = onehot.fprop([reshaped_y]) reshaped_y_hat = y_hat.reshape((y_hat.shape[0]*y_hat.shape[1], -1)) cost = NllMul(reshaped_y, reshaped_y_hat) cost = cost.mean() cost.name = 'cost' model.inputs = [x, y] model._params = params model.nodes = nodes model.set_updates(update_list) optimizer = Adam( lr=0.001 ) extension = [ GradientClipping(batch_size=batch_size), EpochCount(100),
def fprop(self, X): cost = NllMul(X[0], X[1]) if self.use_sum: return cost.sum() else: return cost.mean()