def __init__(self, size_repr=1024, size_hidden=200, dropout=0.0, lr=0.0002): autoassign(locals()) self.size_classify = 3 if self.size_hidden is None: self.network = LinearClassify(size_repr=self.size_repr, size_classify=self.size_classify, dropout=self.dropout) else: self.network = Classify(size_repr=self.size_repr, size_hidden=self.size_hidden, size_classify=self.size_classify, activation=tanh, dropout=self.dropout) premise = T.fmatrix() hypo = T.fmatrix() target = T.fmatrix() # should be one hot with context.context(training=True): predicted = self.network(premise, hypo) cost = CrossEntropy(target, predicted) with context.context(training=False): predicted_test = self.network(premise, hypo) cost_test = CrossEntropy(target, predicted_test) self.updater = Adam(lr=self.lr) updates = self.updater.get_updates(self.network.params, cost, disconnected_inputs='error') self.train = theano.function([premise, hypo, target], cost, updates=updates) self.loss_test = theano.function([premise, hypo, target], cost_test) self.predict = theano.function([premise, hypo], predicted_test)
def _make_train(self): """Compile function for training.""" with context.context(training=True): prediction = self(*self.inputs) thecost = self.cost(self.target, prediction) return theano.function(self.inputs + [self.target], thecost, updates=self.updater.get_updates(self.params(), thecost))
def make_loss_test(self): with context.context(training=False): output_v_pred_test, output_t_pred_test = self.network(self.input, self.output_t_prev, self.output_v) cost_T_test = CrossEntropy(self.output_t_oh, output_t_pred_test) cost_V_test = self.cost_visual(self.output_v, output_v_pred_test) cost_test = self.alpha * cost_T_test + (1.0 - self.alpha) * cost_V_test return theano.function([self.input, self.output_v, self.output_t_prev, self.output_t ], [cost_test, cost_T_test, cost_V_test], on_unused_input='warn')
def make_train(self): with context.context(training=True): output_v_pred, output_t_pred = self.network(self.input, self.output_t_prev, self.output_v) cost_T = CrossEntropy(self.output_t_oh, output_t_pred) cost_V = self.cost_visual(self.output_v, output_v_pred) cost = self.alpha * cost_T + (1.0 - self.alpha) * cost_V return theano.function([self.input, self.output_v, self.output_t_prev, self.output_t ], [cost, cost_T, cost_V], updates=self.updates(cost), on_unused_input='warn')
def _make_loss_test(self): with context.context(training=False): output_v_pred_test, output_t_pred_test = self.network( self.input, self.output_t_prev, self.output_v) cost_T_test = CrossEntropy(self.output_t_oh, output_t_pred_test) cost_V_test = self.cost_visual(self.output_v, output_v_pred_test) cost_test = self.alpha * cost_T_test + (1.0 - self.alpha) * cost_V_test return theano.function( [self.input, self.output_v, self.output_t_prev, self.output_t], [cost_test, cost_T_test, cost_V_test], on_unused_input='warn')
def _make_train(self): with context.context(training=True): output_v_pred, output_t_pred = self.network( self.input, self.output_t_prev, self.output_v) cost_T = CrossEntropy(self.output_t_oh, output_t_pred) cost_V = self.cost_visual(self.output_v, output_v_pred) cost = self.alpha * cost_T + (1.0 - self.alpha) * cost_V return theano.function( [self.input, self.output_v, self.output_t_prev, self.output_t], [cost, cost_T, cost_V], updates=self.updates(cost), on_unused_input='warn')
def __init__(self, size_vocab, size_embed, size, size_out, depth, network, alpha=0.5, out_depth=1, gru_activation=tanh, visual_activation=linear, cost_visual=CosineDistance, max_norm=None, dropout_prob=0.0): autoassign(locals()) self.network = network(self.size_vocab, self.size_embed, self.size, self.size_out, self.depth, out_depth=self.out_depth, gru_activation=self.gru_activation, visual_activation=self.visual_activation, dropout_prob=self.dropout_prob ) input = T.imatrix() output_t_prev = T.imatrix() output_t = T.imatrix() output_v = T.fmatrix() self.OH = OneHot(size_in=self.size_vocab) output_t_oh = self.OH(output_t) # TRAINING with context.context(training=True): output_v_pred, output_t_pred = self.network(input, output_t_prev, output_v) cost_T = CrossEntropy(output_t_oh, output_t_pred) cost_V = self.cost_visual(output_v, output_v_pred) cost = self.alpha * cost_T + (1.0 - self.alpha) * cost_V #TESTING with context.context(training=False): output_v_pred_test, output_t_pred_test = self.network(input, output_t_prev, output_v) cost_T_test = CrossEntropy(output_t_oh, output_t_pred_test) cost_V_test = self.cost_visual(output_v, output_v_pred_test) cost_test = self.alpha * cost_T_test + (1.0 - self.alpha) * cost_V_test self.updater = util.Adam(max_norm=self.max_norm) updates = self.updater.get_updates(self.network.params, cost) # TODO better way of dealing with needed/unneeded output_t_prev? self.train = theano.function([input, output_v, output_t_prev, output_t ], [cost, cost_T, cost_V], updates=updates, on_unused_input='warn') self.loss_test = theano.function([input, output_v, output_t_prev, output_t ], [cost_test, cost_T_test, cost_V_test], on_unused_input='warn')
def __init__(self, size_repr=1024, size_hidden=200, dropout=0.0, lr=0.0002): autoassign(locals()) self.size_classify = 3 if self.size_hidden is None: self.network = LinearClassify(size_repr=self.size_repr, size_classify=self.size_classify, dropout=self.dropout) else: self.network = Classify(size_repr=self.size_repr, size_hidden=self.size_hidden, size_classify=self.size_classify, activation=tanh, dropout=self.dropout) premise = T.fmatrix() hypo = T.fmatrix() target = T.fmatrix() # should be one hot with context.context(training=True): predicted = self.network(premise, hypo) cost = CrossEntropy(target, predicted) with context.context(training=False): predicted_test = self.network(premise, hypo) cost_test = CrossEntropy(target, predicted_test) self.updater = Adam(lr=self.lr) updates = self.updater.get_updates(self.network.params, cost, disconnected_inputs='error') self.train = theano.function([premise, hypo, target], cost, updates=updates) self.loss_test = theano.function([premise, hypo, target], cost_test) self.predict = theano.function([premise, hypo] , predicted_test)
def _make_pile(self): with context.context(training=False): rep = self.Encode.GRU.intermediate(self.Encode.Embed(*self.inputs)) return theano.function(self.inputs, rep)
def _make_representation(self): with context.context(training=False): rep = self.Encode(*self.inputs) return theano.function(self.inputs, rep)
def _make_encode_images(self): images = T.fmatrix() with context.context(training=False): rep = util.l2norm(self.ImgEncoder(images)) return theano.function([images], rep)
def _make_predict(self): """Compile function for computing the target.""" with context.context(training=False): prediction = self(*self.inputs) return theano.function(self.inputs, prediction)
def _make_pile(self): # This is the same as _make_representation with context.context(training=False): rep = self.Encode(*self.inputs) return theano.function(self.inputs, rep)
def _make_conv_states(self): with context.context(training=False): states = self.Encode.Conv(*self.inputs) return theano.function(self.inputs, states)
def _make_representation(self): with context.context(training=False): rep = self.network.EncodeV(self.network.Shared(*self.inputs)) return theano.function(self.inputs, rep)
def _make_loss_test(self): """Compile function for computing the loss function at test time.""" with context.context(training=False): prediction = self(*self.inputs) thecost = self.cost(self.target, prediction) return theano.function(self.inputs + [self.target], thecost)
def _make_pile(self): with context.context(training=False): rep = self.network.EncodeV.intermediate(self.network.Shared(*self.inputs)) return theano.function(self.inputs, rep)