idx_train, idx_valid = train_valid_test_split(idx, train=0.8, inc_test=False, seed=1234) X_valid = X_train[idx_valid] y_valid = y_train[idx_valid] X_train = X_train[idx_train] y_train = y_train[idx_train] print("#Train:", X_train.shape, y_train.shape) print("#Valid:", X_valid.shape, y_valid.shape) print("#Test:", X_test.shape, y_test.shape) # ====== trainign ====== # print('Start training ...') task = training.MainLoop(batch_size=128, seed=1234, shuffle_level=2, allow_rollback=True) task.set_checkpoint(MODEL_PATH, model) task.set_callbacks([ training.NaNDetector(), training.EarlyStopGeneralizationLoss('valid', ce, threshold=5, patience=3) ]) task.set_train_task(func=f_train, data=(X_train, y_train), epoch=NB_EPOCH, name='train') task.set_valid_task(func=f_test, data=(X_valid, y_valid), freq=training.Timer(percentage=0.6), name='valid') task.set_eval_task(func=f_test, data=(X_test, y_test), name='eval') task.run() # ===========================================================================
task = training.MainLoop(batch_size=64, seed=12, shuffle_level=2) task.set_save(get_modelpath(name='mnist.ai', override=True), ops) task.set_task(f_train, (ds['X_train'], ds['y_train']), epoch=arg['epoch'], name='train') task.set_subtask(f_test, (ds['X_test'], ds['y_test']), freq=0.6, name='valid') task.set_subtask(f_test, (ds['X_test'], ds['y_test']), when=-1, name='test') task.set_callback([ training.ProgressMonitor(name='train', format='Results: {:.4f}-{:.4f}'), training.ProgressMonitor(name='valid', format='Results: {:.4f}-{:.4f}', tracking={2: lambda x: sum(x)}), training.ProgressMonitor(name='test', format='Results: {:.4f}-{:.4f}'), training.History(), training.EarlyStopGeneralizationLoss('valid', threshold=5, patience=3), training.NaNDetector(('train', 'valid'), patience=3, rollback=True) ]) task.run() # ====== plot the training process ====== # task['History'].print_info() task['History'].print_batch('train') task['History'].print_batch('valid') task['History'].print_epoch('test') print('Benchmark TRAIN-batch:', task['History'].benchmark('train', 'batch_end').mean) print('Benchmark TRAIN-epoch:', task['History'].benchmark('train', 'epoch_end').mean) print('Benchmark PRED-batch:', task['History'].benchmark('valid', 'batch_end').mean) print('Benchmark PRED-epoch:', task['History'].benchmark('valid',
i if j == 0 else None, **kws) curr_grid_index += 3 V.plot_save(os.path.join(FIGURE_PATH, 'latent_%d.png' % curr_epoch), dpi=200, log=True) exit() # ====== training ====== # runner = T.MainLoop(batch_size=args.batch, seed=1234, shuffle_level=2, allow_rollback=False, verbose=2) runner.set_callbacks([ T.NaNDetector(task_name=None, patience=-1, detect_inf=True), None if args.no_monitor else T.EpochSummary( task_name=('train', 'valid'), output_name=(loss, iw_loss, KL_mean, NLLK_mean), print_plot=False, save_path=os.path.join(FIGURE_PATH, 'summary.png')), T.LambdaCallback(fn=plot_epoch, task_name='train') ]) runner.set_train_task(func=f_train, data=[X_train, X_train], epoch=args.epoch, name='train') runner.set_valid_task(func=f_score, data=[X_test, X_test], name='valid') runner.run()
f_train = K.function(inputs, [ce, acc, optimizer.norm], updates=updates, training=True) print('Building testing functions ...') f_score = K.function(inputs, [ce, acc], training=False) # Latent spaces f_z = K.function(inputs=X, outputs=z, training=False) # =========================================================================== # Create trainer # =========================================================================== print('Start training ...') task = training.MainLoop(batch_size=BATCH_SIZE, seed=120825, shuffle_level=2, allow_rollback=False, verbose=4) task.set_checkpoint(path=MODEL_PATH, obj=x_vec, increasing=True, max_checkpoint=-1) task.set_callbacks([ training.NaNDetector(task_name='train', patience=-1), training.CheckpointEpoch(task_name='train', epoch_percent=0.5), # training.EarlyStopGeneralizationLoss('valid', ce, # threshold=5, patience=3) ]) task.set_train_task(func=f_train, data=train, epoch=EPOCH, name='train') task.set_valid_task(func=f_score, data=valid, freq=training.Timer(percentage=0.5), name='valid') task.run()