def updates_wrt_cost(self, cost, learning_opts): if self.using_shared_embeddings: return [] learning_rate = learning_opts.learning_rate gradient = util.clipped(T.grad(cost=cost, wrt=self.sequence_embeddings)) return [(self.Wx, T.inc_subtensor(self.sequence_embeddings, -learning_rate * gradient))]
def updates_wrt_cost(self, cost, learning_opts): if not self.train_embeddings: return [] # _one_ update for the embedding matrix; regardless of the number of rnns running # over subslices gradient = util.clipped( T.grad(cost=cost, wrt=self.concatenated_sequence_embeddings)) learning_rate = learning_opts.learning_rate return [(self.shared_embeddings, T.inc_subtensor(self.concatenated_sequence_embeddings, -learning_rate * gradient))]
def updates_wrt_cost(self, cost, learning_opts): if not self.train_embeddings: return [] # _one_ update for the embedding matrix; regardless of the number of rnns running # over subslices gradient = util.clipped(T.grad(cost=cost, wrt=self.concatenated_sequence_embeddings)) learning_rate = learning_opts.learning_rate return [(self.shared_embeddings, T.inc_subtensor(self.concatenated_sequence_embeddings, -learning_rate * gradient))]
def updates_wrt_cost(self, cost, learning_opts): gradients = util.clipped(T.grad(cost=cost, wrt=self.dense_params())) return self.update_fn(self.dense_params(), gradients, learning_opts)