def __init__(self, eval_set, test_set, epoch_freq): super(SelfCallback, self).__init__(eval_set=eval_set, epoch_freq=epoch_freq) self.train_batch_time = None self.total_batch_index = 0 self.test_set = test_set self.metric = Accuracy()
def _epoch_fit(self, dataset, callbacks): epoch = self.epoch_index self.total_cost[:] = 0 # iterate through minibatches of the dataset for mb_idx, (x, t) in enumerate(dataset): callbacks.on_minibatch_begin(epoch, mb_idx) self.be.begin(Block.minibatch, mb_idx) x = self.fprop(x) # Save per-minibatch accuracy acc = Accuracy() mbstart = callbacks.callback_data['time_markers/minibatch'][ epoch - 1] if epoch > 0 else 0 callbacks.callback_data['accuracy/train'][mbstart + mb_idx] = acc( x, t) * 100.0 self.total_cost[:] = self.total_cost + self.cost.get_cost(x, t) # deltas back propagate through layers # for every layer in reverse except the 0th one delta = self.cost.get_errors(x, t) self.bprop(delta) self.optimizer.optimize(self.layers_to_optimize, epoch=epoch) self.be.end(Block.minibatch, mb_idx) callbacks.on_minibatch_end(epoch, mb_idx) # now we divide total cost by the number of batches, # so it was never total cost, but sum of averages # across all the minibatches we trained on self.total_cost[:] = self.total_cost / dataset.nbatches
def eval(self, test_set): """ Evaluate the model's test_set on error_rate, test_accuracy_rate and precision_recall_rate Args: test_set (ArrayIterator): The test set Returns: tuple(int): error_rate, test_accuracy_rate and precision_recall_rate """ error_rate = self.model.eval(test_set, metric=Misclassification()) test_accuracy_rate = self.model.eval(test_set, metric=Accuracy()) precision_recall_rate = self.model.eval(test_set, metric=PrecisionRecall(2)) return error_rate, test_accuracy_rate, precision_recall_rate
reset_cells=True, batch_norm=True) layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni), rlayer, RecurrentSum(), Dropout(keep=0.5), Affine(2, g_uni, bias=g_uni, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01, gradient_clip_value=gradient_clip_value) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # eval model print "Train Accuracy - ", 100 * model.eval(train_set, metric=Accuracy()) print "Test Accuracy - ", 100 * model.eval(valid_set, metric=Accuracy())
layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni), rlayer, RecurrentSum(), Dropout(keep=0.5), Affine(2, g_uni, bias=g_uni, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01, gradient_clip_value=gradient_clip_value) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # eval model neon_logger.display("Train Accuracy - {}".format( 100 * model.eval(train_set, metric=Accuracy()))) neon_logger.display("Test Accuracy - {}".format( 100 * model.eval(valid_set, metric=Accuracy())))
# weight initialization init_emb = Uniform(low=-0.1/embedding_dim, high=0.1/embedding_dim) init_glorot = GlorotUniform() layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb), LSTM(hidden_size, init_glorot, activation=Tanh(), gate_activation=Logistic(), reset_cells=True), RecurrentSum(), Dropout(keep=0.5), Affine(2, init_glorot, bias=init_glorot, activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) metric = Accuracy() model = Model(layers=layers) optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients) # configure callbacks callbacks = Callbacks(model, train_set, args, valid_set=valid_set) # train model model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
train = make_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, random_seed) valid = make_test_loader(args.manifest['test'], args.manifest_root, model.be, args.subset_pct) # setup callbacks callbacks = Callbacks(model, eval_set=valid, **args.callback_args) # gradient descent with momentum, weight decay, and learning rate decay schedule learning_rate_sched = Schedule(list(range(6, args.epochs, 6)), 0.1) opt_gdm = GradientDescentMomentum(0.003, 0.9, wdecay=0.005, schedule=learning_rate_sched) opt_biases = GradientDescentMomentum(0.006, 0.9, schedule=learning_rate_sched) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # train model model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # output accuracies neon_logger.display('Train Accuracy = %.1f%%' % (model.eval(train, metric=Accuracy()) * 100)) neon_logger.display('Validation Accuracy = %.1f%%' % (model.eval(valid, metric=Accuracy()) * 100))
# Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [ MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(babi.vocab_size, init=GlorotUniform(), activation=Softmax()) ] model = Model(layers=layers) # setup callbacks callbacks = Callbacks(model, train_set, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=Adam(), num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), callbacks=callbacks) # output accuracies print('Train Accuracy = %.1f%%' % (model.eval(train_set, metric=Accuracy()) * 100)) print('Test Accuracy = %.1f%%' % (model.eval(valid_set, metric=Accuracy()) * 100))
def on_train_end(self, callback_data, model): callback_data['infer_acc/accuracy'][0] = model.eval( self.test_set, metric=Accuracy())[0] * 100.0
def on_epoch_end(self, callback_data, model, epoch): super(SelfCallback, self).on_epoch_end(callback_data, model, epoch) callback_data['accuracy/valid'][epoch] = model.eval( self.eval_set, metric=Accuracy())[0] * 100.0
**common) # model creation model = create_network() # setup callbacks callbacks = Callbacks(model, eval_set=test, **args.callback_args) # gradient descent with momentum, weight decay, and learning rate decay schedule learning_rate_sched = Schedule(list(range(6, args.epochs, 6)), 0.1) opt_gdm = GradientDescentMomentum(0.003, 0.9, wdecay=0.005, schedule=learning_rate_sched) opt_biases = GradientDescentMomentum(0.006, 0.9, schedule=learning_rate_sched) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # train model cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # output accuracies neon_logger.display('Train Accuracy = %.1f%%' % (model.eval(train, metric=Accuracy()) * 100)) neon_logger.display('Test Accuracy = %.1f%%' % (model.eval(test, metric=Accuracy()) * 100))
rlayer = DeepBiRNN(hidden_size, g_uni, activation=Tanh(), depth=1, reset_cells=reset_cells, batch_norm=False) elif args.rlayer_type == 'bibnrnn': rlayer = DeepBiRNN(hidden_size, g_uni, activation=Tanh(), depth=1, reset_cells=reset_cells, batch_norm=True) layers = [ LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni), rlayer, RecurrentSum(), Dropout(keep=0.5), Affine(2, g_uni, bias=g_uni, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = Adagrad(learning_rate=0.01, gradient_clip_value=gradient_clip_value) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # eval model neon_logger.display("Train Accuracy - {}".format(100 * model.eval(train_set, metric=Accuracy()))) neon_logger.display("Test Accuracy - {}".format(100 * model.eval(valid_set, metric=Accuracy())))
if args.callback_args['save_path'] is None: args.callback_args['save_path'] = args.save_path # setup backend args.batch_size = 32 be = gen_backend(**extract_valid_args(args, gen_backend)) # load the bAbI dataset babi = babi_handler(args.data_dir, args.task) train_set = QA(*babi.train) valid_set = QA(*babi.test) # create model model = create_model(babi.vocab_size, args.rlayer_type) # train model if not args.test_only: # setup callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) model.fit(train_set, optimizer=Adam(), num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), callbacks=callbacks) else: model.load_params(args.model_file) # output accuracies print('Train Accuracy = %.1f%%' % (model.eval(train_set, metric=Accuracy())*100)) print('Test Accuracy = %.1f%%' % (model.eval(valid_set, metric=Accuracy())*100))
def main(): # Get command-line parameters parser = get_p1b2_parser() args = parser.parse_args() #print('Args:', args) # Get parameters from configuration file fileParameters = p1b2.read_config_file(args.config_file) #print ('Params:', fileParameters) # Correct for arguments set by default by neon parser # (i.e. instead of taking the neon parser default value fall back to the config file, # if effectively the command-line was used, then use the command-line value) # This applies to conflictive parameters: batch_size, epochs and rng_seed if not any("--batch_size" in ag or "-z" in ag for ag in sys.argv): args.batch_size = fileParameters['batch_size'] if not any("--epochs" in ag or "-e" in ag for ag in sys.argv): args.epochs = fileParameters['epochs'] if not any("--rng_seed" in ag or "-r" in ag for ag in sys.argv): args.rng_seed = fileParameters['rng_seed'] # Consolidate parameter set. Command-line parameters overwrite file configuration gParameters = p1_common.args_overwrite_config(args, fileParameters) print('Params:', gParameters) # Determine verbosity level loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') # Construct extension to save model ext = p1b2.extension_from_parameters(gParameters, '.neon') # Get default parameters for initialization and optimizer functions kerasDefaults = p1_common.keras_default_config() seed = gParameters['rng_seed'] # Load dataset #(X_train, y_train), (X_test, y_test) = p1b2.load_data(gParameters, seed) (X_train, y_train), (X_val, y_val), (X_test, y_test) = p1b2.load_data(gParameters, seed) print("Shape X_train: ", X_train.shape) print("Shape X_val: ", X_val.shape) print("Shape X_test: ", X_test.shape) print("Shape y_train: ", y_train.shape) print("Shape y_val: ", y_val.shape) print("Shape y_test: ", y_test.shape) print("Range X_train --> Min: ", np.min(X_train), ", max: ", np.max(X_train)) print("Range X_val --> Min: ", np.min(X_val), ", max: ", np.max(X_val)) print("Range X_test --> Min: ", np.min(X_test), ", max: ", np.max(X_test)) print("Range y_train --> Min: ", np.min(y_train), ", max: ", np.max(y_train)) print("Range y_val --> Min: ", np.min(y_val), ", max: ", np.max(y_val)) print("Range y_test --> Min: ", np.min(y_test), ", max: ", np.max(y_test)) input_dim = X_train.shape[1] num_classes = int(np.max(y_train)) + 1 output_dim = num_classes # The backend will represent the classes using one-hot representation (but requires an integer class as input !) # Re-generate the backend after consolidating parsing and file config gen_backend(backend=args.backend, rng_seed=seed, device_id=args.device_id, batch_size=gParameters['batch_size'], datatype=gParameters['data_type'], max_devices=args.max_devices, compat_mode=args.compat_mode) train = ArrayIterator(X=X_train, y=y_train, nclass=num_classes) val = ArrayIterator(X=X_val, y=y_val, nclass=num_classes) test = ArrayIterator(X=X_test, y=y_test, nclass=num_classes) # Initialize weights and learning rule initializer_weights = p1_common_neon.build_initializer( gParameters['initialization'], kerasDefaults, seed) initializer_bias = p1_common_neon.build_initializer( 'constant', kerasDefaults, 0.) activation = p1_common_neon.get_function(gParameters['activation'])() # Define MLP architecture layers = [] reshape = None for layer in gParameters['dense']: if layer: layers.append( Affine(nout=layer, init=initializer_weights, bias=initializer_bias, activation=activation)) if gParameters['dropout']: layers.append(Dropout(keep=(1 - gParameters['dropout']))) layers.append( Affine(nout=output_dim, init=initializer_weights, bias=initializer_bias, activation=activation)) # Build MLP model mlp = Model(layers=layers) # Define cost and optimizer cost = GeneralizedCost(p1_common_neon.get_function(gParameters['loss'])()) optimizer = p1_common_neon.build_optimizer(gParameters['optimizer'], gParameters['learning_rate'], kerasDefaults) callbacks = Callbacks(mlp, eval_set=val, metric=Accuracy(), eval_freq=1) # Seed random generator for training np.random.seed(seed) mlp.fit(train, optimizer=optimizer, num_epochs=gParameters['epochs'], cost=cost, callbacks=callbacks) # model save #save_fname = "model_mlp_W_" + ext #mlp.save_params(save_fname) # Evalute model on test set print('Model evaluation by neon: ', mlp.eval(test, metric=Accuracy())) y_pred = mlp.get_outputs(test) #print ("Shape y_pred: ", y_pred.shape) scores = p1b2.evaluate_accuracy(p1_common.convert_to_class(y_pred), y_test) print('Evaluation on test data:', scores)