def run_reiam(flags_obj, data): """Run MNIST training and eval loop. Args: flags_obj: An object containing parsed flag values. """ assert (tf.keras.backend.backend() == 'tensorflow') assert (tf.keras.backend.image_data_format() == 'channels_last') (images_px_train, images_px_test, images_ch_train, images_ch_test, labels_train, labels_test, parameters_train, parameters_test) = data images = map(lambda (px, ch): parse_image_fn(px, ch), zip(images_px_train, images_ch_train)) labels = map(lambda lb: parse_label_fn(lb), labels_train) images = np.asarray(images) labels = np.asarray(labels) params = { #'data_format': data_format, #'multi_gpu': multi_gpu, 'n_rows': n_rows, 'n_columns': n_columns, 'n_channels': n_channels, 'n_classes': n_classes, 'dropout': dropout, 'learning_rate': learning_rate, 'gradient_clip_norm': gradient_clip_norm, } model = create_model(params, use_cnn_keras=True) # ____________________________________________________________________________ # Train model import datetime from nn_logging import getLogger from nn_training import TrainingLog start_time = datetime.datetime.now() logger = getLogger() logger.info('Begin training ...') with TrainingLog() as tlog: # redirect sys.stdout history = model.fit(images, labels, epochs=flags_obj.num_epochs, batch_size=flags_obj.batch_size, validation_split=0.1, verbose=1) logger.info('Done training. Time elapsed: {0} sec'.format( str(datetime.datetime.now() - start_time))) save_my_model(model, name='model_cnn') # ____________________________________________________________________________ # Evaluate model #images = map(lambda (px, ch): parse_image_fn(px, ch), zip(images_px_test, images_ch_test)) #labels = map(lambda lb: parse_label_fn(lb), labels_test) #images = np.asarray(images) #labels = np.asarray(labels) # #loss_and_metrics = model.evaluate(images, labels, batch_size=flags_obj.batch_size, verbose=0) #logger.info('loss and metrics: {0}'.format(loss_and_metrics)) return model
import numpy as np #from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from nn_logging import getLogger logger = getLogger() from nn_encode import Encoder_MK # ______________________________________________________________________________ def muon_data(filename, adjust_scale=0, reg_pt_scale=1.0, correct_for_eta=False): try: logger.info('Loading muon data from {0} ...'.format(filename)) loaded = np.load(filename) the_variables = loaded['variables'] the_parameters = loaded['parameters'] logger.info('Loaded the variables with shape {0}'.format(the_variables.shape)) logger.info('Loaded the parameters with shape {0}'.format(the_parameters.shape)) except: logger.error('Failed to load data from file: {0}'.format(filename)) assert(the_variables.shape[0] == the_parameters.shape[0]) # MK encoder = Encoder_MK(the_variables, the_parameters, adjust_scale=adjust_scale, reg_pt_scale=reg_pt_scale, drop_MK=True) if correct_for_eta: x, y, w, x_mask = encoder.get_x(), encoder.get_y_corrected_for_eta(), encoder.get_w(), encoder.get_x_mask() else: x, y, w, x_mask = encoder.get_x(), encoder.get_y(), encoder.get_w(), encoder.get_x_mask()
def run_reiam(flags_obj, data): """Run MNIST training and eval loop. Args: flags_obj: An object containing parsed flag values. """ (images_px_train, images_px_test, images_ch_train, images_ch_test, labels_train, labels_test, parameters_train, parameters_test) = data # ____________________________________________________________________________ class ModelHelpers(object): def __init__(self): from cnn_utils import past_stop_threshold, apply_clean self.past_stop_threshold = past_stop_threshold self.apply_clean = apply_clean model_helpers = ModelHelpers() class HooksHelper(object): def __init__(self): from cnn_utils import get_train_hooks self.get_train_hooks = get_train_hooks hooks_helper = HooksHelper() model_helpers.apply_clean(flags_obj) model_function = model_fn # Get number of GPUs as defined by the --num_gpus flags and the number of # GPUs available on the machine. num_gpus = flags_obj.num_gpus multi_gpu = num_gpus > 1 if multi_gpu: # Validate that the batch size can be split into devices. distribution_utils.per_device_batch_size(flags_obj.batch_size, num_gpus) # There are two steps required if using multi-GPU: (1) wrap the model_fn, # and (2) wrap the optimizer. The first happens here, and (2) happens # in the model_fn itself when the optimizer is defined. model_function = tf.contrib.estimator.replicate_model_fn( model_fn, loss_reduction=tf.losses.Reduction.MEAN, devices=["/device:GPU:%d" % d for d in range(num_gpus)]) data_format = flags_obj.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') params = { #'data_format': data_format, #'multi_gpu': multi_gpu, 'n_rows': n_rows, 'n_columns': n_columns, 'n_channels': n_channels, 'n_classes': n_classes, 'dropout': dropout, 'learning_rate': learning_rate, 'gradient_clip_norm': gradient_clip_norm, } # Create Estimator sess_config = tf.ConfigProto( log_device_placement=flags_obj.log_device_placement, intra_op_parallelism_threads=flags_obj.num_intra_threads, inter_op_parallelism_threads=flags_obj.num_inter_threads, allow_soft_placement=True) model_config = tf.estimator.RunConfig( model_dir=flags_obj.model_dir, tf_random_seed=flags_obj.tf_random_seed, save_summary_steps=100, save_checkpoints_secs=30 * 60, session_config=sess_config) reiam_classifier = tf.estimator.Estimator(model_fn=model_function, model_dir=flags_obj.model_dir, config=model_config, params=params) reiam_classifier._keras_model = create_model(params, training=True) #FIXME # Set up training and evaluation input functions. def get_train_input_fn_and_hook(): feed_fn_hook = tf.train.FeedFnHook(feed_fn=None) def input_fn(): with tf.name_scope('train_data'): get_shape_ph = lambda x: [None] + list(x[1:]) images_px_ph = tf.placeholder( images_px_train.dtype, get_shape_ph(images_px_train.shape)) images_ch_ph = tf.placeholder( images_ch_train.dtype, get_shape_ph(images_ch_train.shape)) labels_ph = tf.placeholder(labels_train.dtype, get_shape_ph(labels_train.shape)) parameters_ph = tf.placeholder( parameters_train.dtype, get_shape_ph(parameters_train.shape)) feed_dict = { images_px_ph: images_px_train, images_ch_ph: images_ch_train, labels_ph: labels_train } feed_fn_hook.feed_fn = lambda: feed_dict dataset1 = tf.data.Dataset.from_tensor_slices( (images_px_ph, images_ch_ph)) dataset1 = dataset1.map(map_func=parse_image_fn) dataset2 = tf.data.Dataset.from_tensor_slices((labels_ph)) dataset2 = dataset2.map(map_func=parse_label_fn) dataset = tf.data.Dataset.zip((dataset1, dataset2)) if flags_obj.shuffle: dataset = dataset.shuffle( buffer_size=flags_obj.shuffle_buffer_size) dataset = dataset.batch(batch_size=flags_obj.batch_size) if flags_obj.prefetch: dataset = dataset.prefetch( buffer_size=flags_obj.prefetch_buffer_size) dataset = dataset.repeat(flags_obj.epochs_between_evals) return dataset return input_fn, feed_fn_hook def get_eval_input_fn_and_hook(): feed_fn_hook = tf.train.FeedFnHook(feed_fn=None) def input_fn(): with tf.name_scope('eval_data'): get_shape_ph = lambda x: [None] + list(x[1:]) images_px_ph = tf.placeholder( images_px_test.dtype, get_shape_ph(images_px_test.shape)) images_ch_ph = tf.placeholder( images_ch_test.dtype, get_shape_ph(images_ch_test.shape)) labels_ph = tf.placeholder(labels_test.dtype, get_shape_ph(labels_test.shape)) parameters_ph = tf.placeholder( parameters_test.dtype, get_shape_ph(parameters_test.shape)) feed_dict = { images_px_ph: images_px_test, images_ch_ph: images_ch_test, labels_ph: labels_test } feed_fn_hook.feed_fn = lambda: feed_dict dataset1 = tf.data.Dataset.from_tensor_slices( (images_px_ph, images_ch_ph)) dataset1 = dataset1.map(map_func=parse_image_fn) dataset2 = tf.data.Dataset.from_tensor_slices((labels_ph)) dataset2 = dataset2.map(map_func=parse_label_fn) dataset = tf.data.Dataset.zip((dataset1, dataset2)) if flags_obj.shuffle: dataset = dataset.shuffle( buffer_size=flags_obj.shuffle_buffer_size) dataset = dataset.batch(batch_size=flags_obj.batch_size * 20) if flags_obj.prefetch: dataset = dataset.prefetch( buffer_size=flags_obj.prefetch_buffer_size) dataset = dataset.repeat(flags_obj.epochs_between_evals) return dataset return input_fn, feed_fn_hook train_input_fn, train_input_hook = get_train_input_fn_and_hook() eval_input_fn, eval_input_hook = get_eval_input_fn_and_hook() # Set up hook that outputs training logs every 100 steps. train_hooks = hooks_helper.get_train_hooks(flags_obj.hooks, model_dir=flags_obj.model_dir, batch_size=flags_obj.batch_size) eval_hooks = [] # Patch the function _get_features_and_labels_from_input_fn() import types reiam_classifier.train_input_fn = train_input_fn reiam_classifier.train_input_hook = train_input_hook reiam_classifier.eval_input_fn = eval_input_fn reiam_classifier.eval_input_hook = eval_input_hook reiam_classifier._get_features_and_labels_from_input_fn = types.MethodType( _get_features_and_labels_from_input_fn, reiam_classifier) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, hooks=train_hooks, max_steps=None) eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn, hooks=eval_hooks, steps=None) # ____________________________________________________________________________ # Train and evaluate model. import datetime from nn_logging import getLogger from nn_training import TrainingLog start_time = datetime.datetime.now() logger = getLogger() logger.info('Begin training ...') with TrainingLog() as tlog: # redirect sys.stdout for epoch in range(flags_obj.num_epochs): eval_results, _ = tf.estimator.train_and_evaluate( reiam_classifier, train_spec, eval_spec) print( 'Epoch {0}/{1} - loss: {2} - global_step: {3} - accuracy: {4} - accuracy_at_k: {5}' .format(epoch + 1, flags_obj.num_epochs, eval_results['loss'], eval_results['global_step'], eval_results['accuracy'], eval_results['accuracy_at_k'])) logger.info('Done training. Time elapsed: {0} sec'.format( str(datetime.datetime.now() - start_time))) save_keras_model(reiam_classifier, model_name='model_cnn') return reiam_classifier