def main(): tf.logging.set_verbosity(tf.logging.INFO) models_dir = 'training/single_column/models3' # load the data df, stim = load_data() columns = list(df.columns)[1:] test_data = {'image': stim[:50, 16:-16, 16:-16]} # get predictions from all the models column_predictions = {} for i, column_name in enumerate(columns): print('Predicting values for the column "%s"...' % column_name) # find the model directory best_models_path = root_dir('%s/%d_%s/export/best' % (models_dir, i, column_name)) latest_model_subdir = sorted(os.listdir(best_models_path), reverse=True)[0] latest_model_dir = os.path.join(best_models_path, latest_model_subdir) # create predictor predict_fn = predictor.from_saved_model(latest_model_dir) # get predictions column_predictions[column_name] = predict_fn(test_data)['spike'] # generate a submission file with open(root_dir('data/submission/single_column/3.csv'), 'w') as f: writer = csv.writer(f) writer.writerow(['Id'] + columns) for i in range(len(test_data['image'])): writer.writerow([i] + [column_predictions[column_name][i] for column_name in columns])
def main(): # enable TensorFlow logging tf.logging.set_verbosity(tf.logging.INFO) tf_logging._get_logger().propagate = False # fix double messages # directory with the exported model saved_model_dir = root_dir('export/final_model') # image size that the model accepts image_size = 48 # load the images from the dataset _, imgs = load_data() # get test images and crop them to the right size imgs = get_test_dataset(imgs, image_size) # load the model predict_fn = tf.contrib.predictor.from_saved_model(saved_model_dir) # get predictions res = predict_fn({'image': imgs}) # print predicted spikes pprint(res['spikes'])
def main(use_best_value: bool): models_dir = root_dir('training/single_column/models3') model_subdirs = os.listdir(models_dir) models_rmses = [] for subdir in model_subdirs: if subdir.startswith('.'): continue # read the model summaries eval_dir = os.path.join(models_dir, subdir, 'eval') eval_results = read_eval_metrics(eval_dir) if use_best_value: # get the best RMSE value rmse = None for step, metrics in eval_results.items(): val = metrics['rmse'] if (rmse is None) or (val < rmse): rmse = val else: # get the latest RMSE value rmse = eval_results[next(reversed(eval_results))]['rmse'] models_rmses.append(rmse) print('Mean RMSE: %.04f' % (sum(models_rmses) / len(models_rmses)))
def generate_submission(model_type, model_dir, submission_num): submission_dir = root_dir('data/submission/%s/%s' % (model_type, submission_num)) if os.path.isdir(submission_dir): raise ValueError('Submission #%d already exists' % submission_num) os.makedirs(submission_dir) # load the data df, stim = load_data() columns = list(df.columns)[1:] config = load_model_config(model_dir) test_data = {'image': get_test_dataset(stim, config['model']['image_size'])} # create the predictor export_dir = root_dir(os.path.join(model_dir, 'export', 'best')) latest_model_subdir = sorted(os.listdir(export_dir), reverse=True)[0] latest_model_dir = os.path.join(export_dir, latest_model_subdir) # get predictor predict_fn = predictor.from_saved_model(latest_model_dir) # get predictions predictions = predict_fn(test_data)['spikes'] # generate a submission file with open(os.path.join(submission_dir, 'submission_%d.csv' % submission_num), 'w') as f: writer = csv.writer(f) writer.writerow(['Id'] + columns) for i in range(len(test_data['image'])): writer.writerow([i] + list(predictions[i])) # copy config file config_path = get_model_config_path(model_dir) copyfile(config_path, os.path.join(submission_dir, CONFIG_FILENAME)) # copy the model copytree(latest_model_dir, os.path.join(submission_dir, 'model'))
def model_fn(features, labels, mode, params): image = features['image'] num_classes = params['model']['num_classes'] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # build convolutional layers conv = build_conv_layers(image, params['model']['conv_layers'], is_training) # load convolutional and dense layers from a checkpoint freeze_variables = {} checkpoint_path = params['training'].get('checkpoint_path') freeze_restored_variables = params['training'].get('freeze_restored_variables', False) if checkpoint_path: tvars = tf.trainable_variables() assignment_map = {} for var in tvars: assignment_map[var.name[:-2]] = var if freeze_restored_variables: freeze_variables[var.name] = True tf.train.init_from_checkpoint(root_dir(checkpoint_path), assignment_map) # build dense layers dense = build_dense_layers(conv, params['model']['dense_layers'], is_training) # get logits if 'subnet' in params: # build NN for each neuron subnet_dropout_rate = params['model']['subnet'].get('subnet_dropout_rate', 0) if subnet_dropout_rate: dense = tf.layers.dropout(inputs=dense, rate=subnet_dropout_rate, training=is_training) logits_layer_params = dict(params['model']['logits_layer']) logits_layer_params['num_units'] = 1 logits_concat = [] for i in range(num_classes): subnet_dense = build_dense_layers(dense, params['model']['subnet']['dense_layers'], is_training) subnet_logits = build_dense_layers(subnet_dense, [logits_layer_params], is_training) logits_concat.append(subnet_logits) logits = tf.concat(logits_concat, axis=-1) else: # a single layer to get a spike logits_layer_params = dict(params['model']['logits_layer']) logits_layer_params['num_units'] = num_classes logits = build_dense_layers(dense, [logits_layer_params], is_training) # return prediction specification if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions={'spikes': logits}) # make sure that images were distorted correctly and display them in TensorBoard max_images = 12 images = image[:max_images] assert_min = tf.assert_greater_equal(tf.reduce_min(images), 0.0, message='Image contains values less than 0') assert_max = tf.assert_less_equal(tf.reduce_max(images), 1.0, message='Image contains values greater than 1') with tf.control_dependencies([assert_min, assert_max]): tf.summary.image('images', tf.cast(images * 255, dtype=tf.uint8), max_outputs=max_images) # compute the loss nan_mask = tf.cast(features['nan_mask'], tf.float32) mse_loss = tf.losses.mean_squared_error(labels=labels, predictions=logits, weights=nan_mask) loss = mse_loss + tf.losses.get_regularization_loss() # get train variables train_vars = [var for var in tf.trainable_variables() if var.name not in freeze_variables] # return training specification if mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.train.get_global_step(), learning_rate=params['training']['learning_rate'], optimizer='Adam', summaries=['learning_rate', 'loss', 'gradients', 'gradient_norm'], variables=train_vars, ) # perform update ops for batch normalization update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = tf.group([train_op, update_ops]) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # evaluation metrics eval_metric_ops = { 'rmse': tf.metrics.root_mean_squared_error(labels=labels, predictions=logits, weights=nan_mask), } # RMSE per column for i in range(num_classes): eval_metric_ops['rmse/column%d' % i] = tf.metrics.root_mean_squared_error(labels=labels[:, i], predictions=logits[:, i], weights=nan_mask[:, i]) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def main(): tf.logging.set_verbosity(tf.logging.INFO) eval_steps = 10 eval_size = 50 export_best_models = True train_column_id = None models_dir = 'training/single_column/models3' models_postfix = '' # load the data df, imgs = load_data() columns = list(df.columns)[1:] # train a model for each column for i, column_name in enumerate(columns): if (train_column_id is not None) and (i != train_column_id): continue print('Training the model for the column "%s"...' % column_name) train_imgs, train_labels, eval_imgs, eval_labels = get_column_datasets(column_name, df, imgs, eval_size) print('Train size: %d, eval size: %d' % (len(train_labels), len(eval_labels))) # create the estimator model_dir = root_dir('%s/%d_%s' % (models_dir, i, column_name)) if models_postfix: model_dir += '_' + models_postfix estimator = tf.estimator.Estimator( model_fn=model_fn, config=tf.estimator.RunConfig( model_dir=model_dir, save_checkpoints_steps=eval_steps, save_summary_steps=eval_steps, ) ) # training input function train_data = {'image': train_imgs} train_input_fn = tf.estimator.inputs.numpy_input_fn(x=train_data, y=train_labels, batch_size=len(train_labels), num_epochs=None, shuffle=True) # evaluation training function eval_data = {'image': eval_imgs} eval_input_fn = tf.estimator.inputs.numpy_input_fn(x=eval_data, y=eval_labels, num_epochs=1, shuffle=False) # hooks early_stopping_hook = early_stopping.stop_if_no_decrease_hook(estimator, 'rmse', eval_steps * 10, run_every_secs=None, run_every_steps=eval_steps) exporter = tf.estimator.BestExporter(name='best', serving_input_receiver_fn=serving_input_receiver_fn, exports_to_keep=1, compare_fn=lambda best_eval_result, current_eval_result: # should be "<=" to export the best model on the 1st evaluation current_eval_result['rmse'] <= best_eval_result['rmse'], ) # train and evaluate train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, hooks=[early_stopping_hook]) eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn, exporters=(exporter if export_best_models else None), throttle_secs=0) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
def load_data(): df = pd.read_csv(root_dir('data/train.csv')) imgs = np.load(root_dir('data/stim.npy')) return df, imgs
def tune_hyperparameters(model_type: str, experiment_group: str, experiment_name: str): ray_num_cpus = 4 num_cpus_per_process = 1 num_gpus_per_process = 0.5 ray.init(num_cpus=ray_num_cpus, ignore_reinit_error=True, include_webui=False) tuning_config_dir = root_dir('configs/%s/hp_tuning' % model_type) models_dir = root_dir('training/%s/hp_tuning/%s/%s' % (model_type, experiment_group, experiment_name)) ray_results_dir = root_dir('ray_results/%s' % experiment_group) # read the base config with open(os.path.join(tuning_config_dir, 'config.yaml')) as f: base_config = yaml.safe_load(f) # read mutations config with open(os.path.join(tuning_config_dir, 'mutations.yaml')) as f: mutations_grid = yaml.safe_load(f) # get mutated configs mutations = get_mutations(mutations_grid) # use only fraction of GPU session_config = None if num_gpus_per_process < 1: session_config = tf.ConfigProto() session_config.gpu_options.per_process_gpu_memory_fraction = num_gpus_per_process def tune_fn(tune_config, reporter): mutation = tune_config['mutation'] # apply mutation to a base config config = mutate_config(base_config, mutation) # get model's directory model_dir = os.path.join(models_dir, generate_mutation_name(mutation)) # save the config file to the model's directory write_model_config(model_dir, yaml.safe_dump(config)) # train the model model_builder = create_builder(model_type, config) train(model_builder, model_dir, reporter, session_config) configuration = tune.Experiment( experiment_name, run=tune_fn, local_dir=ray_results_dir, config={ 'mutation': tune.grid_search(mutations), }, trial_name_creator=tune.function( lambda trial: generate_mutation_name(trial.config['mutation'])), resources_per_trial={ 'cpu': num_cpus_per_process, 'gpu': num_gpus_per_process, }, ) tune.run_experiments(configuration)