def compile(self, optimizer, loss): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) train_loss = self.loss(self.X_train) test_loss = self.loss(self.X_test) train_loss.name = 'train_loss' test_loss.name = 'test_loss' for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train test_ins = self.X_test else: train_ins = [self.X_train] test_ins = [self.X_test] self._train = K.function(train_ins, train_loss, updates=updates) self._test = K.function(test_ins, test_loss)
def build(self, input_shape): history_shape = (self.max_items + 1, ) + input_shape[0][1:] self.history = self.add_weight(history_shape, initializer='zero', name='{}_history'.format(self.name), trainable=False) self.loss_added = False self.loss_func = objectives.get(self.loss)
def compile(self, optimizer, loss, theano_mode=None): # loss is a dictionary mapping output name to loss functions ys = [] ys_train = [] ys_test = [] weights = [] train_loss = 0. test_loss = 0. for output_name in self.output_order: loss_fn = loss[output_name] output = self.outputs[output_name] y_train = output.get_output(True) y_test = output.get_output(False) y = T.zeros_like(y_test) ys.append(y) ys_train.append(y_train) ys_test.append(y_test) if hasattr(output, "get_output_mask"): mask = output.get_output_mask() else: mask = None weight = T.ones_like(y_test) weights.append(weight) weighted_loss = weighted_objective(objectives.get(loss_fn)) train_loss += weighted_loss(y, y_train, weight, mask) test_loss += weighted_loss(y, y_test, weight, mask) train_loss.name = 'train_loss' test_loss.name = 'test_loss' ins = [self.inputs[name].input for name in self.input_order] train_ins = ins + ys + weights test_ins = ins + ys + weights for r in self.regularizers: train_loss = r(train_loss) self.optimizer = optimizers.get(optimizer) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates self.theano_mode = theano_mode self.loss = loss self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(inputs=ins, outputs=ys_test, allow_input_downcast=True, mode=theano_mode)
def test_loss_masking(): weighted_loss = weighted_objective(objectives.get('mae')) shape = (3, 4, 2) X = np.arange(24).reshape(shape) Y = 2 * X # Normally the trailing 1 is added by standardize_weights weights = np.ones((3, )) mask = np.ones((3, 4)) mask[1, 0] = 0 out = K.eval( weighted_loss(K.variable(X), K.variable(Y), K.variable(weights), K.variable(mask)))
def test_loss_masking(): weighted_loss = weighted_objective(objectives.get('mae')) shape = (3, 4, 2) X = np.arange(24).reshape(shape) Y = 2 * X # Normally the trailing 1 is added by standardize_weights weights = np.ones((3,)) mask = np.ones((3, 4)) mask[1, 0] = 0 out = K.eval(weighted_loss(K.variable(X), K.variable(Y), K.variable(weights), K.variable(mask)))
def keras_wrap(model, target, output, loss): """ Convenience function for wrapping a Keras loss function. """ # pylint: disable=import-error import keras.objectives as O import keras.backend as K # pylint: enable=import-error if isinstance(loss, str): loss = O.get(loss) shape = model.outputs[target].value._keras_shape # pylint: disable=protected-access ins = [(target, K.placeholder(ndim=len(shape), dtype=K.dtype(model.outputs[target].value), name=target))] out = loss(ins[0][1], output) return ins, out
def compile(self, optimizer, loss, log_fcn=lambda x, y: (x, y), joint_model=False, skiplist=[]): log = lambda x: log_fcn(x, True) log("Entering compile...") log("Compiling functions...") self.optimizer = optimizers.get(optimizer) self.old_lr = self.optimizer.lr if 'lr' in dir(self.optimizer) else 0 self.lr = T.scalar() self.optimizer.lr = self.lr self.loss = objectives.get(loss) self.X = self.layers[0].input # input of model self.Y = T.tensor3() # vector word labels self.y_train = self.layers[-1].output(train=True)[0] self.y_test = self.layers[-1].output(train=False)[0] self.train_loss = self.loss(self.Y, self.y_train) self.test_score = self.loss(self.Y, self.y_test) updates = self.optimizer.get_updates(self.params, self.train_loss) if 'train' not in skiplist: log("Creating train function...") self._train = theano.function([self.X, self.Y, self.lr], self.train_loss, updates=updates, allow_input_downcast=True) if 'predict' not in skiplist: log("Creating predict function...") self._predict = theano.function([self.X], self.y_test, allow_input_downcast=True) if 'test' not in skiplist: log("Creating test function...") self._test = theano.function([self.X, self.Y], self.test_score, allow_input_downcast=True) log("Done compiling functions")
def test_loss_masking_time(self): theano.config.mode = 'FAST_COMPILE' weighted_loss = weighted_objective(objectives.get('categorical_crossentropy')) shape = (3, 4, 2) X = np.arange(24).reshape(shape) Y = 2 * X weights = np.ones((3, 4, 1)) # Normally the trailing 1 is added by standardize_weights weights[0, 0] = 0 mask = np.ones((3, 4)) mask[1, 0] = 0 out = weighted_loss(X, Y, weights, mask).eval() weights[0, 0] = 1e-9 # so that nonzero() doesn't remove this weight out2 = weighted_loss(X, Y, weights, mask).eval() print(out) print(out2) assert abs(out - out2) < 1e-8
def keras_wrap(model, target, output, loss): """ Convenience function for wrapping a Keras loss function. """ # pylint: disable=import-error import keras.objectives as O import keras.backend as K # pylint: enable=import-error if isinstance(loss, str): loss = O.get(loss) shape = model.outputs[target].value._keras_shape # pylint: disable=protected-access ins = [ (target, K.placeholder( ndim=len(shape), dtype=K.dtype(model.outputs[target].value), name=target )) ] out = loss(ins[0][1], output) return ins, out
def compile(self, optimizer, loss, theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train= False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) train_loss = self.loss(self.y_train) test_loss = self.loss(self.y_test) train_loss.name = 'train_loss' test_loss.name = 'test_loss' self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train test_ins = self.X_test predict_ins = self.X_test else: train_ins = [self.X_train] test_ins = [self.X_test] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode, on_unused_input='ignore')
def compile(self, optimizer, loss, log_fcn=lambda x, y: (x, y), joint_model=False, skiplist = []): log = lambda x: log_fcn(x, True) log("Entering compile...") log("Compiling functions...") self.optimizer = optimizers.get(optimizer) self.old_lr = self.optimizer.lr if 'lr' in dir(self.optimizer) else 0 self.lr = T.scalar() self.optimizer.lr = self.lr self.loss = objectives.get(loss) self.X = self.layers[0].input # input of model self.Y = T.tensor3() # vector word labels self.y_train = self.layers[-1].output(train=True)[0] self.y_test = self.layers[-1].output(train=False)[0] self.train_loss = self.loss(self.Y, self.y_train) self.test_score = self.loss(self.Y, self.y_test) updates = self.optimizer.get_updates(self.params, self.train_loss) if 'train' not in skiplist: log("Creating train function...") self._train = theano.function([self.X, self.Y, self.lr], self.train_loss, updates=updates, allow_input_downcast=True) if 'predict' not in skiplist: log("Creating predict function...") self._predict = theano.function([self.X], self.y_test, allow_input_downcast=True) if 'test' not in skiplist: log("Creating test function...") self._test = theano.function([self.X, self.Y], self.test_score, allow_input_downcast=True) log("Done compiling functions")
def __init__(self, parameter_list, loss, fast=False, **kwargs): self.parameter_list = parameter_list self.loss = objectives.get(loss) self.fast = fast super(GradientNormLayer, self).__init__(**kwargs)
def __init__(self, loss, **kwargs): self.loss = objectives.get(loss) super(LossLayer, self).__init__(**kwargs)
def compile(self, optimizer="sgd", loss="mse", policy_rule="max", sample_weight_mode=None): """Initialize model weights and compile functions Notes ----- This function was modifed from `keras.models.compile` which is under MIT License. """ kmodel = self.keras_model kmodel.build() self.policy_rule = policies.get(policy_rule) self.optimizer = optimizers.get(optimizer) self.sample_weight_mode = sample_weight_mode self.loss = objectives.get(loss) weighted_loss = weighted_objective(self.loss) # input of model self.X_train = kmodel.get_input(train=True) self.X_test = kmodel.get_input(train=False) # calculate policy values values_train = kmodel.get_output(train=True) values_test = kmodel.get_output(train=False) self.y_train = self.policy_rule(values_train) self.y_test = self.policy_rule(values_test) # target of model self.y = K.placeholder(ndim=K.ndim(self.y_train)) if self.sample_weight_mode == 'temporal': self.weights = K.placeholder(ndim=2) else: self.weights = K.placeholder(ndim=1) if hasattr(kmodel.layers[-1], "get_output_mask"): mask = kmodel.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) for r in kmodel.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(kmodel.trainable_weights, kmodel.constraints, train_loss) updates += kmodel.updates if type(self.X_train) == list: train_ins = self.X_train + [self.y, self.weights] test_ins = self.X_test + [self.y, self.weights] assert type(self.X_test) == list values_ins_test = self.X_test values_ins_train = self.X_train else: train_ins = [self.X_train, self.y, self.weights] test_ins = [self.X_test, self.y, self.weights] values_ins_test = [self.X_test] values_ins_train = [self.X_train] self._train = K.function(train_ins, [train_loss], updates=updates) self._values_train = K.function(values_ins_train, [values_train], updates=kmodel.state_updates) self._values_test = K.function(values_ins_test, [values_test], updates=kmodel.state_updates) # TODO: check if this is necessary self._test = K.function(test_ins, [test_loss], updates=kmodel.state_updates)
def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) weighted_loss = weighted_objective(objectives.get(loss)) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # target of model self.y = T.zeros_like(self.y_train) self.weights = T.ones_like(self.y_train) if hasattr(self.layers[-1], "get_output_mask"): mask = self.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) train_loss.name = 'train_loss' test_loss.name = 'test_loss' self.y.name = 'y' if class_mode == "categorical": train_accuracy = T.mean( T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean( T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train + [self.y, self.weights] test_ins = self.X_test + [self.y, self.weights] predict_ins = self.X_test else: train_ins = [self.X_train, self.y, self.weights] test_ins = [self.X_test, self.y, self.weights] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy], allow_input_downcast=True, mode=theano_mode)
def test_sequential(): (X_train, y_train), (X_test, y_test) = _get_test_data() # TODO: factor out def data_generator(x, y, batch_size=50): index_array = np.arange(len(x)) while 1: batches = make_batches(len(X_test), batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] x_batch = x[batch_ids] y_batch = y[batch_ids] yield (x_batch, y_batch) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=False) model.train_on_batch(X_train[:32], y_train[:32]) loss = model.evaluate(X_test, y_test) prediction = model.predict_generator(data_generator(X_test, y_test), X_test.shape[0], max_q_size=2) gen_loss = model.evaluate_generator(data_generator(X_test, y_test, 50), X_test.shape[0], max_q_size=2) pred_loss = K.eval(K.mean(objectives.get(model.loss)(K.variable(y_test), K.variable(prediction)))) assert(np.isclose(pred_loss, loss)) assert(np.isclose(gen_loss, loss)) model.predict(X_test, verbose=0) model.predict_classes(X_test, verbose=0) model.predict_proba(X_test, verbose=0) fname = 'test_sequential_temp.h5' model.save_weights(fname, overwrite=True) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim,))) model.add(Activation('relu')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(X_test, y_test, verbose=0) assert(loss == nloss) # test serialization config = model.get_config() Sequential.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str)
def compile(self, optimizer, loss, class_mode="categorical", sample_weight_mode=None): '''Configure the learning process. # Arguments optimizer: str (name of optimizer) or optimizer object. See [optimizers](optimizers.md). loss: str (name of objective function) or objective function. See [objectives](objectives.md). class_mode: one of "categorical", "binary". This is only used for computing classification accuracy or using the predict_classes method. sample_weight_mode: if you need to do timestep-wise sample weighting (2D weights), set this to "temporal". "None" defaults to sample-wise weights (1D). ''' self.optimizer = optimizers.get(optimizer) self.sample_weight_mode = sample_weight_mode self.loss = objectives.get(loss) weighted_loss = weighted_objective(self.loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.single_y_train = self.get_output(train=True) self.single_y_test = self.get_output(train=False) self.diff_train = K.placeholder(ndim=1) self.diff_test = K.placeholder(ndim=1) self.y_train = K.concatenate( [K.dot(self.diff_train, self.single_y_train[:self.diff_train.shape[0]]), K.dot(self.diff_train, self.single_y_train[self.diff_train.shape[0]:])], axis=0) self.y_test = K.concatenate( [K.dot(self.diff_test, self.single_y_test[:self.diff_test.shape[0]]), K.dot(self.diff_test, self.single_y_test[self.diff_test.shape[0]:])], axis=0) # target of model self.y = K.placeholder(ndim=K.ndim(self.y_train)) if self.sample_weight_mode == 'temporal': self.weights = K.placeholder(ndim=2) else: self.weights = K.placeholder(ndim=1) if hasattr(self.layers[-1], "get_output_mask"): mask = self.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) if class_mode == "categorical": train_accuracy = K.mean(K.equal(K.argmax(self.y, axis=-1), K.argmax(self.y_train, axis=-1))) test_accuracy = K.mean(K.equal(K.argmax(self.y, axis=-1), K.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = K.mean(K.equal(self.y, K.round(self.y_train))) test_accuracy = K.mean(K.equal(self.y, K.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.trainable_weights, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train + [self.diff_train, self.y, self.weights] test_ins = self.X_test + [self.diff_test, self.y, self.weights] assert type(self.X_test) == list predict_ins = self.X_test + [self.diff_test] else: train_ins = [self.X_train, self.diff_train, self.y, self.weights] test_ins = [self.X_test, self.diff_test, self.y, self.weights] predict_ins = [self.X_test, self.diff_test] self.__train = K.function(train_ins, [train_loss], updates=updates) self.__train_with_acc = K.function(train_ins, [train_loss, train_accuracy], updates=updates) self.__predict = K.function(predict_ins, [self.y_test], updates=self.state_updates) self.__test = K.function(test_ins, [test_loss], updates=self.state_updates) self.__test_with_acc = K.function(test_ins, [test_loss, test_accuracy], updates=self.state_updates) self._train = lambda rr: self.__train([r[0] for r in rr[:-1]] + [rr[-1]]) self._train_with_acc = lambda rr: self.__train_with_acc([r[0] for r in rr[:-1]] + [rr[-1]]) self._predict = lambda rr: self.__predict([r[0] for r in rr]) self._test = lambda rr: self.__test([r[0] for r in rr[:-1]] + [rr[-1]]) self._test_with_acc = lambda rr: self.__test_with_acc([r[0] for r in rr[:-1]] + [rr[-1]])
y_train = output.output(train=True) y_test = output.output(train=False) mask_train = output.output_mask(train=True) # None in this example mask_test = output.output_mask(train=False) # None in this example print("X_train:", P.pprint(X_train)) print("X_test:", P.pprint(X_test)) print("y_train:") print(P.debugprint(y_train)) print("y_test:") print(P.debugprint(y_test)) """loss""" loss = objectives.get("categorical_crossentropy") weighted_loss = models.weighted_objective(loss) y = K.placeholder(ndim=K.ndim(y_train)) weights = K.placeholder(ndim=1) train_loss = weighted_loss(y, y_train, weights, mask_train) test_loss = weighted_loss(y, y_test, weights, mask_test) _y_train = K.placeholder(ndim=3, name="y_train") _y_test = K.placeholder(ndim=3, name="y_test") _train_loss = weighted_loss(y, _y_train, weights, mask_train) _test_loss = weighted_loss(y, _y_test, weights, mask_test) print("train_loss:", P.pprint(_train_loss)) print("test_loss", P.pprint(_test_loss)) """categorical accuracy""" train_accuracy = K.mean(K.equal(K.argmax(y, axis=-1), K.argmax(y_train, axis=-1)))
def compile(self, optimizer, loss, log_fcn=lambda x, y: (x, y), joint_model=False, skiplist=[]): log = lambda x: log_fcn(x, True) log("Entering compile...") self.optimizer = optimizers.get(optimizer) self.old_lr = self.optimizer.lr if 'lr' in dir(self.optimizer) else 0 self.lr = T.scalar() self.optimizer.lr = self.lr objective = objectives.get(loss) self.loss = create_masked_loss(objective) v = theano.shared(numpy.array([1])) # output of model self.Y = T.tensor3() # vector word labels self.M = T.tensor3() # mask self.X1 = T.tensor3() # first sequence log("Compiling functions...") self.CH_layers = filter( lambda x: hasattr(x, 'C1') and hasattr(x, 'H1'), self.layers) # Loop inner function def make_step(train): # create closure around train def _step(last_X, *last_S): # set top layer's input = last output self.layers[0].input = last_X # C and H have to be manually passed into FlatLSTM # layers for each iteration of the loop. # last_S is variadic, as inputs to _step need to be # tensors. last_C = last_S[:len(self.CH_layers)] last_H = last_S[len(self.CH_layers):] for i, layer in enumerate(self.CH_layers): layer.c_tm1 = last_C[i] layer.h_tm1 = last_H[i] # Get the following: # - final layer's output # - each layer's C (cell memory) # - each layer's H (layer's last output) out, C, H = self.layers[-1].output(train=train) return [out] + C + H return _step # Create train, predict functions train_step = make_step(True) predict_step = make_step(False) # Train and predict result: loop over step function n_steps times. # Initial values are set by the calling function: the first sequence # token, and an initial C and H for each layer. # (this produces a sequence of length n_steps) # Train result can take an extremely long time to compile. if 'train' not in skiplist: log("Creating train result (n_steps={0})...".format(self.steps)) self._train_result_scan, _ = theano.scan( fn=train_step, outputs_info=[dict(initial=self.X1, taps=[-1])] + [ dict(initial=layer.C1, taps=[-1]) for layer in self.CH_layers ] + [ dict(initial=layer.H1, taps=[-1]) for layer in self.CH_layers ], n_steps=self.steps) if 'predict' not in skiplist or 'test' not in skiplist: log("Creating predict result (n_steps={0})...".format(self.steps)) self._predict_result_scan, _ = theano.scan( fn=predict_step, outputs_info=[dict(initial=self.X1, taps=[-1])] + [ dict(initial=layer.C1, taps=[-1]) for layer in self.CH_layers ] + [ dict(initial=layer.H1, taps=[-1]) for layer in self.CH_layers ], n_steps=self.steps) # Fixes dimensions from result function to produce the # correct ordering of (sequence, token, vector) # (dimension #2 is an artefact of porting the functions to a loop) if not 'train' in skiplist: self._train_result = self._train_result_scan[0] self._train_result = self._train_result.dimshuffle(1, 0, 3, 2) self._train_result = self._train_result.flatten(ndim=3) if not ('predict' in skiplist and 'test' in skiplist): self._predict_result = self._predict_result_scan[0] self._predict_result = self._predict_result.dimshuffle(1, 0, 3, 2) self._predict_result = self._predict_result.flatten(ndim=3) # Create train, predict, testing functions if not 'train' in skiplist: log("Setting train loss and updates...") self.train_loss = self.loss(self.Y, self._train_result, self.M) self.updates = self.optimizer.get_updates(self.params, self.train_loss) if not joint_model: log("Creating train function...") self.__train = theano.function( [self.X1, self.Y, self.M] + [self.lr] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.train_loss, updates=self.updates, allow_input_downcast=True) if not 'predict' in skiplist: self.predict_result = self._predict_result if not joint_model: log("Creating predict function...") self.__predict = theano.function( [self.X1] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.predict_result, allow_input_downcast=True) if not 'test' in skiplist: self.test_score = self.loss(self.Y, self._predict_result, self.M) if not joint_model: log("Creating test function...") self.__test = theano.function( [self.X1, self.Y, self.M] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.test_score, allow_input_downcast=True) log("Done compiling functions")
y_train = output.output(train=True) y_test = output.output(train=False) mask_train = output.output_mask(train=True) # None in this example mask_test = output.output_mask(train=False) # None in this example print('X_train:', P.pprint(X_train)) print('X_test:', P.pprint(X_test)) print('y_train:') print(P.debugprint(y_train)) print('y_test:') print(P.debugprint(y_test)) '''loss''' loss = objectives.get('categorical_crossentropy') weighted_loss = models.weighted_objective(loss) y = K.placeholder(ndim=K.ndim(y_train)) weights = K.placeholder(ndim=1) train_loss = weighted_loss(y, y_train, weights, mask_train) test_loss = weighted_loss(y, y_test, weights, mask_test) _y_train = K.placeholder(ndim=3, name='y_train') _y_test = K.placeholder(ndim=3, name='y_test') _train_loss = weighted_loss(y, _y_train, weights, mask_train) _test_loss = weighted_loss(y, _y_test, weights, mask_test) print('train_loss:', P.pprint(_train_loss)) print('test_loss', P.pprint(_test_loss)) '''categorical accuracy''' train_accuracy = K.mean(K.equal(K.argmax(y, axis=-1), K.argmax(y_train, axis=-1)))
def compile_tfrecord(train_model, optimizer, loss, out_tensor_lst, metrics=[], loss_weights=None): train_model.build(train_model) # train_model.build() train_model.optimizer = optimizers.get(optimizer) train_model.loss = loss train_model.loss_weights = loss_weights # prepare loss weights if loss_weights is None: loss_weights_list = [1. for _ in range(len(train_model.outputs))] elif isinstance(loss_weights, dict): for name in loss_weights: if name not in train_model.output_names: raise ValueError('Unknown entry in loss_weights ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(train_model.output_names)) loss_weights_list = [] for name in train_model.output_names: loss_weights_list.append(loss_weights.get(name, 1.)) elif isinstance(loss_weights, list): if len(loss_weights) != len(train_model.outputs): raise ValueError('When passing a list as loss_weights, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(train_model.outputs)) + ' outputs, but you passed loss_weights=' + str(loss_weights)) loss_weights_list = loss_weights else: raise TypeError('Could not interpret loss_weights argument: ' + str(loss_weights) + ' - expected a list of dicts.') # prepare loss functions if isinstance(loss, dict): for name in loss: if name not in train_model.output_names: raise ValueError('Unknown entry in loss ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(train_model.output_names)) loss_functions = [] for name in train_model.output_names: if name not in loss: raise ValueError('Output "' + name + '" missing from loss dictionary.') loss_functions.append(objectives.get(loss[name])) elif isinstance(loss, list): if len(loss) != len(train_model.outputs): raise ValueError('When passing a list as loss, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(train_model.outputs)) + ' outputs, but you passed loss=' + str(loss)) loss_functions = [objectives.get(l) for l in loss] else: loss_function = objectives.get(loss) loss_functions = [ loss_function for _ in range(len(train_model.outputs)) ] train_model.loss_functions = loss_functions weighted_losses = [_weighted_masked_objective(fn) for fn in loss_functions] # prepare metrics train_model.metrics = metrics train_model.metrics_names = ['loss'] train_model.metrics_tensors = [] # compute total loss total_loss = None for i in range(len(train_model.outputs)): y_true = out_tensor_lst[i] y_pred = train_model.outputs[i] _loss = loss_functions[i] # _loss = weighted_losses[i] loss_weight = loss_weights_list[i] # output_loss = _loss(y_true, y_pred, None, None) output_loss = K.mean(_loss(y_true, y_pred)) if len(train_model.outputs) > 1: train_model.metrics_tensors.append(output_loss) train_model.metrics_names.append(train_model.output_names[i] + '_loss') if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss # add regularization penalties # and other layer-specific losses for loss_tensor in train_model.losses: total_loss += loss_tensor # list of same size as output_names. # contains tuples (metrics for output, names of metrics) nested_metrics = _collect_metrics(metrics, train_model.output_names) def append_metric(layer_num, metric_name, metric_tensor): """Helper function, used in loop below""" if len(train_model.output_names) > 1: metric_name = train_model.output_layers[ layer_num].name + '_' + metric_name train_model.metrics_names.append(metric_name) train_model.metrics_tensors.append(metric_tensor) for i in range(len(train_model.outputs)): y_true = out_tensor_lst[i] y_pred = train_model.outputs[i] output_metrics = nested_metrics[i] for metric in output_metrics: if metric == 'accuracy' or metric == 'acc': # custom handling of accuracy # (because of class mode duality) output_shape = train_model.internal_output_shapes[i] acc_fn = None if output_shape[-1] == 1 or train_model.loss_functions[ i] == objectives.binary_crossentropy: # case: binary accuracy acc_fn = metrics_module.binary_accuracy elif train_model.loss_functions[ i] == objectives.sparse_categorical_crossentropy: # case: categorical accuracy with sparse targets acc_fn = metrics_module.sparse_categorical_accuracy else: acc_fn = metrics_module.categorical_accuracy append_metric(i, 'acc', acc_fn(y_true, y_pred)) else: metric_fn = metrics_module.get(metric) metric_result = metric_fn(y_true, y_pred) if not isinstance(metric_result, dict): metric_result = {metric_fn.__name__: metric_result} for name, tensor in six.iteritems(metric_result): append_metric(i, name, tensor) # prepare gradient updates and state updates train_model.optimizer = optimizers.get(optimizer) train_model.total_loss = total_loss train_model.train_function = None train_model.test_function = None train_model.predict_function = None # collected trainable weights and sort them deterministically. trainable_weights = train_model.trainable_weights # Sort weights by name trainable_weights.sort(key=lambda x: x.name) train_model._collected_trainable_weights = trainable_weights
def compile(self, optimizer, loss, log_fcn=lambda x, y: (x, y), joint_model=False, skiplist = []): log = lambda x: log_fcn(x, True) log("Entering compile...") self.optimizer = optimizers.get(optimizer) self.old_lr = self.optimizer.lr if 'lr' in dir(self.optimizer) else 0 self.lr = T.scalar() self.optimizer.lr = self.lr objective = objectives.get(loss) self.loss = create_masked_loss(objective) v = theano.shared(numpy.array([1])) # output of model self.Y = T.tensor3() # vector word labels self.M = T.tensor3() # mask self.X1 = T.tensor3() # first sequence log("Compiling functions...") self.CH_layers = filter(lambda x: hasattr(x, 'C1') and hasattr(x, 'H1'), self.layers) # Loop inner function def make_step(train): # create closure around train def _step(last_X, *last_S): # set top layer's input = last output self.layers[0].input = last_X # C and H have to be manually passed into FlatLSTM # layers for each iteration of the loop. # last_S is variadic, as inputs to _step need to be # tensors. last_C = last_S[:len(self.CH_layers)] last_H = last_S[len(self.CH_layers):] for i, layer in enumerate(self.CH_layers): layer.c_tm1 = last_C[i] layer.h_tm1 = last_H[i] # Get the following: # - final layer's output # - each layer's C (cell memory) # - each layer's H (layer's last output) out, C, H = self.layers[-1].output(train=train) return [out] + C + H return _step # Create train, predict functions train_step = make_step(True) predict_step = make_step(False) # Train and predict result: loop over step function n_steps times. # Initial values are set by the calling function: the first sequence # token, and an initial C and H for each layer. # (this produces a sequence of length n_steps) # Train result can take an extremely long time to compile. if 'train' not in skiplist: log("Creating train result (n_steps={0})...".format(self.steps)) self._train_result_scan, _ = theano.scan(fn=train_step, outputs_info = [dict(initial=self.X1, taps=[-1])] + [dict(initial=layer.C1, taps=[-1]) for layer in self.CH_layers] + [dict(initial=layer.H1, taps=[-1]) for layer in self.CH_layers], n_steps=self.steps) if 'predict' not in skiplist or 'test' not in skiplist: log("Creating predict result (n_steps={0})...".format(self.steps)) self._predict_result_scan, _ = theano.scan(fn=predict_step, outputs_info = [dict(initial=self.X1, taps=[-1])] + [dict(initial=layer.C1, taps=[-1]) for layer in self.CH_layers] + [dict(initial=layer.H1, taps=[-1]) for layer in self.CH_layers], n_steps=self.steps) # Fixes dimensions from result function to produce the # correct ordering of (sequence, token, vector) # (dimension #2 is an artefact of porting the functions to a loop) if not 'train' in skiplist: self._train_result = self._train_result_scan[0] self._train_result = self._train_result.dimshuffle(1, 0, 3, 2) self._train_result = self._train_result.flatten(ndim=3) if not ('predict' in skiplist and 'test' in skiplist): self._predict_result = self._predict_result_scan[0] self._predict_result = self._predict_result.dimshuffle(1, 0, 3, 2) self._predict_result = self._predict_result.flatten(ndim=3) # Create train, predict, testing functions if not 'train' in skiplist: log("Setting train loss and updates...") self.train_loss = self.loss(self.Y, self._train_result, self.M) self.updates = self.optimizer.get_updates(self.params, self.train_loss) if not joint_model: log("Creating train function...") self.__train = theano.function([self.X1, self.Y, self.M] + [self.lr] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.train_loss, updates=self.updates, allow_input_downcast=True) if not 'predict' in skiplist: self.predict_result = self._predict_result if not joint_model: log("Creating predict function...") self.__predict = theano.function([self.X1] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.predict_result, allow_input_downcast=True) if not 'test' in skiplist: self.test_score = self.loss(self.Y, self._predict_result, self.M) if not joint_model: log("Creating test function...") self.__test = theano.function([self.X1, self.Y, self.M] + [layer.C1 for layer in self.CH_layers] + [layer.H1 for layer in self.CH_layers], self.test_score, allow_input_downcast=True) log("Done compiling functions")
def make_eval_function(model, loss, loss_weights=None, **kwargs): # prepare loss weights if loss_weights is None: loss_weights_list = [1. for _ in range(len(model.outputs))] elif isinstance(loss_weights, dict): for name in loss_weights: if name not in model.output_names: raise ValueError('Unknown entry in loss_weights ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(model.output_names)) loss_weights_list = [] for name in model.output_names: loss_weights_list.append(loss_weights.get(name, 1.)) elif isinstance(loss_weights, list): if len(loss_weights) != len(model.outputs): raise ValueError('When passing a list as loss_weights, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(model.outputs)) + ' outputs, but you passed loss_weights=' + str(loss_weights)) loss_weights_list = loss_weights else: raise TypeError('Could not interpret loss_weights argument: ' + str(loss_weights) + ' - expected a list of dicts.') # prepare loss functions if isinstance(loss, dict): for name in loss: if name not in model.output_names: raise ValueError('Unknown entry in loss ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(model.output_names)) loss_functions = [] for name in model.output_names: if name not in loss: raise ValueError('Output "' + name + '" missing from loss dictionary.') loss_functions.append(objectives.get(loss[name])) elif isinstance(loss, list): if len(loss) != len(model.outputs): raise ValueError('When passing a list as loss, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(model.outputs)) + ' outputs, but you passed loss=' + str(loss)) loss_functions = [objectives.get(l) for l in loss] else: loss_function = objectives.get(loss) loss_functions = [loss_function for _ in range(len(model.outputs))] weighted_losses = [ weighted_objective_per_sample(fn) for fn in loss_functions ] # compute total loss total_loss = None for i in range(len(model.outputs)): y_true = model.targets[i] y_pred = model.outputs[i] weighted_loss = weighted_losses[i] sample_weight = model.sample_weights[i] loss_weight = loss_weights_list[i] output_loss = weighted_loss(y_true, y_pred, sample_weight) if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss if model.uses_learning_phase and not isinstance(K.learning_phase(), int): inputs = model.inputs + model.targets + model.sample_weights + [ K.learning_phase() ] else: inputs = model.inputs + model.targets + model.sample_weights # return loss and metrics, no gradient updates. # Does update the network states. eval_function = K.function(inputs, [total_loss], updates=model.state_updates, **kwargs) return eval_function
def compile(self, optimizer, loss, theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) # weighted_loss = weighted_objective(objectives.get(loss)) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) # self.y_train = self.get_output(train=True) # self.y_test = self.get_output(train=False) # # target of model # self.y = T.zeros_like(self.y_train) # self.weights = T.ones_like(self.y_train) # train_loss = weighted_loss(self.y, self.y_train, self.weights) # test_loss = weighted_loss(self.y, self.y_test, self.weights) train_loss = self.loss(self.X_train) test_loss = self.loss(self.X_test) train_loss.name = 'train_loss' test_loss.name = 'test_loss' # self.y.name = 'y' # if class_mode == "categorical": # train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) # test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) # elif class_mode == "binary": # train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) # test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) # else: # raise Exception("Invalid class mode:" + str(class_mode)) #train_accuracy = monitor(self.X_train) #test_accuracy = monitor(self.X_test) # self.class_mode = class_mode self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) if type(self.X_train) == list: train_ins = self.X_train# + [self.y, self.weights] test_ins = self.X_test# + [self.y, self.weights] # predict_ins = self.X_test else: train_ins = [self.X_train]#, self.y, self.weights] test_ins = [self.X_test]#, self.y, self.weights] # predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) #self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], # updates=updates, allow_input_downcast=True, mode=theano_mode) # self._predict = theano.function(predict_ins, self.y_test, # allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode)
def compile(self, optimizer, loss, metrics=[], loss_weights=None, sample_weight_mode=None, **kwargs): #super(sModel, self).compile(optimizer, loss, metrics, loss_weights, # sample_weights_mode, **kwargs) self.optimizer = optimizers.get(optimizer) self.sample_weight_mode = sample_weight_mode self.loss = loss self.loss_weights = loss_weights # prepare loss weights if loss_weights is None: loss_weights_list = [1. for _ in range(len(self.outputs))] elif isinstance(loss_weights, dict): for name in loss_weights: if name not in self.output_names: raise ValueError('Unknown entry in loss_weights ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(self.output_names)) loss_weights_list = [] for name in self.output_names: loss_weights_list.append(loss_weights.get(name, 1.)) elif isinstance(loss_weights, list): if len(loss_weights) != len(self.outputs): raise ValueError('When passing a list as loss_weights, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(self.outputs)) + ' outputs, but you passed loss_weights=' + str(loss_weights)) loss_weights_list = loss_weights else: raise TypeError('Could not interpret loss_weights argument: ' + str(loss_weights) + ' - expected a list of dicts.') # prepare loss functions if isinstance(loss, dict): for name in loss: if name not in self.output_names: raise ValueError('Unknown entry in loss ' 'dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(self.output_names)) loss_functions = [] for name in self.output_names: if name not in loss: raise ValueError('Output "' + name + '" missing from loss dictionary.') loss_functions.append(objectives.get(loss[name])) elif isinstance(loss, list): if len(loss) != len(self.outputs): raise ValueError('When passing a list as loss, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(self.outputs)) + ' outputs, but you passed loss=' + str(loss)) loss_functions = [objectives.get(l) for l in loss] else: loss_function = objectives.get(loss) loss_functions = [loss_function for _ in range(len(self.outputs))] self.loss_functions = loss_functions weighted_losses = [weighted_objective(fn) for fn in loss_functions] # prepare output masks masks = self.compute_mask(self.inputs, mask=None) if masks is None: masks = [None for _ in self.outputs] if not isinstance(masks, list): masks = [masks] # prepare sample weights if isinstance(sample_weight_mode, dict): for name in sample_weight_mode: if name not in self.output_names: raise ValueError('Unknown entry in ' 'sample_weight_mode dictionary: "' + name + '". ' 'Only expected the following keys: ' + str(self.output_names)) sample_weights = [] sample_weight_modes = [] for name in self.output_names: if name not in sample_weight_mode: raise ValueError('Output "' + name + '" missing from sample_weight_modes ' 'dictionary') if sample_weight_mode.get(name) == 'temporal': weight = K.placeholder(ndim=2, name=name + '_sample_weights') sample_weight_modes.append('temporal') else: weight = K.placeholder(ndim=1, name=name + '_sample_weights') sample_weight_modes.append(None) sample_weights.append(weight) elif isinstance(sample_weight_mode, list): if len(sample_weight_mode) != len(self.outputs): raise ValueError('When passing a list as sample_weight_mode, ' 'it should have one entry per model outputs. ' 'The model has ' + str(len(self.outputs)) + ' outputs, but you passed ' 'sample_weight_mode=' + str(sample_weight_mode)) sample_weights = [] sample_weight_modes = [] for mode, name in zip(sample_weight_mode, self.output_names): if mode == 'temporal': weight = K.placeholder(ndim=2, name=name + '_sample_weights') sample_weight_modes.append('temporal') else: weight = K.placeholder(ndim=1, name=name + '_sample_weights') sample_weight_modes.append(None) sample_weights.append(weight) else: if sample_weight_mode == 'temporal': sample_weights = [ K.placeholder(ndim=2, name=name + '_sample_weights') for name in self.output_names ] sample_weight_modes = [ 'temporal' for name in self.output_names ] else: sample_weights = [ K.placeholder(ndim=1, name=name + '_sample_weights') for name in self.output_names ] sample_weight_modes = [None for name in self.output_names] self.sample_weight_modes = sample_weight_modes # prepare targets of model self.targets = [] for i in range(len(self.outputs)): shape = self.internal_output_shapes[i] name = self.output_names[i] self.targets.append( K.placeholder(ndim=len(shape), name=name + '_target', sparse=K.is_sparse(self.outputs[i]), dtype=K.dtype(self.outputs[i]))) # prepare metrics self.metrics = metrics self.metrics_names = ['loss'] self.metrics_tensors = [] # compute total loss total_loss = None for i in range(len(self.outputs)): y_true = self.targets[i] y_pred = self.outputs[i] weighted_loss = weighted_losses[i] sample_weight = sample_weights[i] mask = masks[i] loss_weight = loss_weights_list[i] output_loss = weighted_loss(y_true, y_pred, sample_weight, mask) if len(self.outputs) > 1: self.metrics_tensors.append(output_loss) self.metrics_names.append(self.output_names[i] + '_loss') if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss # add regularization penalties # and other layer-specific losses for loss_tensor in self.losses: total_loss += loss_tensor # list of same size as output_names. # contains tuples (metrics for output, names of metrics) nested_metrics = collect_metrics(metrics, self.output_names) def append_metric(layer_num, metric_name, metric_tensor): """Helper function, used in loop below""" if len(self.output_names) > 1: metric_name = self.output_layers[ layer_num].name + '_' + metric_name self.metrics_names.append(metric_name) self.metrics_tensors.append(metric_tensor) for i in range(len(self.outputs)): y_true = self.targets[i] y_pred = self.outputs[i] output_metrics = nested_metrics[i] for metric in output_metrics: if metric == 'accuracy' or metric == 'acc': # custom handling of accuracy # (because of class mode duality) output_shape = self.internal_output_shapes[i] acc_fn = None if output_shape[-1] == 1 or self.loss_functions[ i] == objectives.binary_crossentropy: # case: binary accuracy acc_fn = metrics_module.binary_accuracy elif self.loss_functions[ i] == objectives.sparse_categorical_crossentropy: # case: categorical accuracy with sparse targets acc_fn = metrics_module.sparse_categorical_accuracy else: acc_fn = metrics_module.categorical_accuracy append_metric(i, 'acc', acc_fn(y_true, y_pred)) else: metric_fn = metrics_module.get(metric) metric_result = metric_fn(y_true, y_pred) if not isinstance(metric_result, dict): metric_result = {metric_fn.__name__: metric_result} for name, tensor in six.iteritems(metric_result): append_metric(i, name, tensor) # prepare gradient updates and state updates self.total_loss = total_loss self.sample_weights = sample_weights # functions for train, test and predict will # be compiled lazily when required. # This saves time when the user is not using all functions. self._function_kwargs = kwargs self.train_function = None self.test_function = None self.predict_function = None # collected trainable weights and sort them deterministically. trainable_weights = self.trainable_weights # Sort weights by name if trainable_weights: if K.backend() == 'theano': trainable_weights.sort( key=lambda x: x.name if x.name else x.auto_name) else: trainable_weights.sort(key=lambda x: x.name) self._collected_trainable_weights = trainable_weights
def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None): self.optimizer = optimizers.get(optimizer) self.loss = objectives.get(loss) weighted_loss = weighted_objective(objectives.get(loss)) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.y_train = self.get_output(train=True) self.y_test = self.get_output(train=False) # target of model self.y = T.zeros_like(self.y_train) self.weights = T.ones_like(self.y_train) if hasattr(self.layers[-1], "get_output_mask"): mask = self.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) train_loss.name = 'train_loss' test_loss.name = 'test_loss' self.y.name = 'y' if class_mode == "categorical": train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1))) test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train))) test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode self.theano_mode = theano_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.params, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train + [self.y, self.weights] test_ins = self.X_test + [self.y, self.weights] predict_ins = self.X_test else: train_ins = [self.X_train, self.y, self.weights] test_ins = [self.X_test, self.y, self.weights] predict_ins = [self.X_test] self._train = theano.function(train_ins, train_loss, updates=updates, allow_input_downcast=True, mode=theano_mode) self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy], updates=updates, allow_input_downcast=True, mode=theano_mode) self._predict = theano.function(predict_ins, self.y_test, allow_input_downcast=True, mode=theano_mode) self._test = theano.function(test_ins, test_loss, allow_input_downcast=True, mode=theano_mode) self._test_with_acc = theano.function(test_ins, [test_loss, test_accuracy], allow_input_downcast=True, mode=theano_mode)
def test_sequential(): (X_train, y_train), (X_test, y_test) = _get_test_data() # TODO: factor out def data_generator(x, y, batch_size=50): index_array = np.arange(len(x)) while 1: batches = make_batches(len(X_test), batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] x_batch = x[batch_ids] y_batch = y[batch_ids] yield (x_batch, y_batch) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim, ))) model.add(Activation('relu')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_data=(X_test, y_test)) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, validation_split=0.1) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=False) model.train_on_batch(X_train[:32], y_train[:32]) loss = model.evaluate(X_test, y_test) prediction = model.predict_generator(data_generator(X_test, y_test), X_test.shape[0], max_q_size=2) gen_loss = model.evaluate_generator(data_generator(X_test, y_test, 50), X_test.shape[0], max_q_size=2) pred_loss = K.eval( K.mean( objectives.get(model.loss)(K.variable(y_test), K.variable(prediction)))) assert (np.isclose(pred_loss, loss)) assert (np.isclose(gen_loss, loss)) model.predict(X_test, verbose=0) model.predict_classes(X_test, verbose=0) model.predict_proba(X_test, verbose=0) fname = 'test_sequential_temp.h5' model.save_weights(fname, overwrite=True) model = Sequential() model.add(Dense(nb_hidden, input_shape=(input_dim, ))) model.add(Activation('relu')) model.add(Dense(nb_class)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.load_weights(fname) os.remove(fname) nloss = model.evaluate(X_test, y_test, verbose=0) assert (loss == nloss) # test serialization config = model.get_config() Sequential.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str)
def adversarial_compile(self, adversarial_optimizer, player_optimizers, loss, **kwargs): """ Configures the learning process. :param adversarial_optimizer: instance of AdversarialOptimizer :param player_optimizers: list of optimizers for each player :param loss: loss function or function name :param kwargs: additional arguments to function compilation :return: """ self._function_kwargs = kwargs self.adversarial_optimizer = adversarial_optimizer assert (len(player_optimizers) == self.player_count) self.optimizers = [ optimizers.get(optimizer) for optimizer in player_optimizers ] self.loss = objectives.get(loss) self.optimizer = None # Build player models self.layers = [] for i in range(self.player_count): # duplicate base model model = Model( self.base_model.inputs, fix_names(self.base_model(self.base_model.inputs), self.base_model.output_names)) # compile model model.compile(self.optimizers[i], loss=self.loss) # add model to list self.layers.append(model) self.train_function = None self.test_function = None # Inputs are same as base model self.internal_input_shapes = self.base_model.internal_input_shapes self.input_names = self.base_model.input_names self.inputs = self.base_model.inputs # Outputs are concatenated player models models = self.layers def collect(f): return list(itertools.chain.from_iterable(f(m) for m in models)) self.internal_output_shapes = collect( lambda m: m.internal_output_shapes) self.loss_functions = collect(lambda m: m.loss_functions) self.targets = collect(lambda m: m.targets) self.outputs = collect(lambda m: m.outputs) self.sample_weights = collect(lambda m: m.sample_weights) self.sample_weight_modes = collect(lambda m: m.sample_weight_modes) # for each target, output name is {player}_{target} self.output_names = [] for i in range(self.player_count): for name in models[i].output_names: self.output_names.append("{}_{}".format( self.player_names[i], name)) # for each metric, metric name is {player}_{metric} self.metrics_names = ["loss"] for i in range(self.player_count): for name in models[i].metrics_names: self.metrics_names.append("{}_{}".format( self.player_names[i], name)) # total loss is sum of losses self.total_loss = np.float32(0) for model in models: self.total_loss += model.total_loss
def compile(self, optimizer, loss, class_mode="categorical", sample_weight_mode=None): '''Configure the learning process. # Arguments optimizer: str (name of optimizer) or optimizer object. See [optimizers](optimizers.md). loss: str (name of objective function) or objective function. See [objectives](objectives.md). class_mode: one of "categorical", "binary". This is only used for computing classification accuracy or using the predict_classes method. sample_weight_mode: if you need to do timestep-wise sample weighting (2D weights), set this to "temporal". "None" defaults to sample-wise weights (1D). ''' self.optimizer = optimizers.get(optimizer) self.sample_weight_mode = sample_weight_mode self.loss = objectives.get(loss) weighted_loss = weighted_objective(self.loss) # input of model self.X_train = self.get_input(train=True) self.X_test = self.get_input(train=False) self.single_y_train = self.get_output(train=True) self.single_y_test = self.get_output(train=False) self.diff_train = K.placeholder(ndim=1) self.diff_test = K.placeholder(ndim=1) self.y_train = K.concatenate([ K.dot(self.diff_train, self.single_y_train[:self.diff_train.shape[0]]), K.dot(self.diff_train, self.single_y_train[self.diff_train.shape[0]:]) ], axis=0) self.y_test = K.concatenate([ K.dot(self.diff_test, self.single_y_test[:self.diff_test.shape[0]]), K.dot(self.diff_test, self.single_y_test[self.diff_test.shape[0]:]) ], axis=0) # target of model self.y = K.placeholder(ndim=K.ndim(self.y_train)) if self.sample_weight_mode == 'temporal': self.weights = K.placeholder(ndim=2) else: self.weights = K.placeholder(ndim=1) if hasattr(self.layers[-1], "get_output_mask"): mask = self.layers[-1].get_output_mask() else: mask = None train_loss = weighted_loss(self.y, self.y_train, self.weights, mask) test_loss = weighted_loss(self.y, self.y_test, self.weights, mask) if class_mode == "categorical": train_accuracy = K.mean( K.equal(K.argmax(self.y, axis=-1), K.argmax(self.y_train, axis=-1))) test_accuracy = K.mean( K.equal(K.argmax(self.y, axis=-1), K.argmax(self.y_test, axis=-1))) elif class_mode == "binary": train_accuracy = K.mean(K.equal(self.y, K.round(self.y_train))) test_accuracy = K.mean(K.equal(self.y, K.round(self.y_test))) else: raise Exception("Invalid class mode:" + str(class_mode)) self.class_mode = class_mode for r in self.regularizers: train_loss = r(train_loss) updates = self.optimizer.get_updates(self.trainable_weights, self.constraints, train_loss) updates += self.updates if type(self.X_train) == list: train_ins = self.X_train + [self.diff_train, self.y, self.weights] test_ins = self.X_test + [self.diff_test, self.y, self.weights] assert type(self.X_test) == list predict_ins = self.X_test + [self.diff_test] else: train_ins = [self.X_train, self.diff_train, self.y, self.weights] test_ins = [self.X_test, self.diff_test, self.y, self.weights] predict_ins = [self.X_test, self.diff_test] self.__train = K.function(train_ins, [train_loss], updates=updates) self.__train_with_acc = K.function(train_ins, [train_loss, train_accuracy], updates=updates) self.__predict = K.function(predict_ins, [self.y_test], updates=self.state_updates) self.__test = K.function(test_ins, [test_loss], updates=self.state_updates) self.__test_with_acc = K.function(test_ins, [test_loss, test_accuracy], updates=self.state_updates) self._train = lambda rr: self.__train([r[0] for r in rr[:-1]] + [rr[-1]]) self._train_with_acc = lambda rr: self.__train_with_acc( [r[0] for r in rr[:-1]] + [rr[-1]]) self._predict = lambda rr: self.__predict([r[0] for r in rr]) self._test = lambda rr: self.__test([r[0] for r in rr[:-1]] + [rr[-1]]) self._test_with_acc = lambda rr: self.__test_with_acc( [r[0] for r in rr[:-1]] + [rr[-1]])