def testing_generator(cnn_model_object, cnn_metadata_dict, cnn_feature_layer_name, num_examples_total): """Generates testing examples for upconvnet. :param cnn_model_object: See doc for `trainval_generator`. :param cnn_metadata_dict: Same. :param cnn_feature_layer_name: Same. :param num_examples_total: Number of examples to read. :return: feature_matrix: See doc for `trainval_generator`. :return: radar_matrix: Same. """ training_option_dict = copy.deepcopy( cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] ) training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = ( NUM_EX_PER_TESTING_BATCH ) partial_cnn_model_object = cnn.model_to_feature_generator( model_object=cnn_model_object, feature_layer_name=cnn_feature_layer_name) conv_2d3d = cnn_metadata_dict[cnn.CONV_2D3D_KEY] layer_operation_dicts = cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] if conv_2d3d: cnn_generator = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, desired_num_examples=num_examples_total) elif layer_operation_dicts is None: cnn_generator = testing_io.generator_2d_or_3d( option_dict=training_option_dict, desired_num_examples=num_examples_total) else: cnn_generator = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=layer_operation_dicts, desired_num_examples=num_examples_total) while True: try: this_storm_object_dict = next(cnn_generator) except StopIteration: break these_image_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if isinstance(these_image_matrices, list): radar_matrix = these_image_matrices[0] else: radar_matrix = these_image_matrices feature_matrix = partial_cnn_model_object.predict( these_image_matrices, batch_size=radar_matrix.shape[0] ) yield (feature_matrix, radar_matrix)
def _run(model_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples, do_backwards_test, separate_radar_heights, downsampling_keys, downsampling_values, num_bootstrap_reps, output_file_name): """Runs permutation test for predictor importance. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples: Same. :param do_backwards_test: Same. :param separate_radar_heights: Same. :param downsampling_keys: Same. :param downsampling_values: Same. :param num_bootstrap_reps: Same. :param output_file_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) metafile_name = cnn.find_metafile(model_file_name=model_file_name) print('Reading metadata from: "{0:s}"...'.format(metafile_name)) cnn_metadata_dict = cnn.read_model_metadata(metafile_name) if len(downsampling_keys) > 1: downsampling_dict = dict( list(zip(downsampling_keys, downsampling_values))) else: downsampling_dict = None training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = ( NUM_EXAMPLES_PER_BATCH) if cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, desired_num_examples=num_examples, list_of_operation_dicts=cnn_metadata_dict[ cnn.LAYER_OPERATIONS_KEY]) elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, desired_num_examples=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, desired_num_examples=num_examples) full_storm_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) target_values = numpy.array([], dtype=int) predictor_matrices = None print(SEPARATOR_STRING) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_storm_id_strings += this_storm_object_dict[ testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY])) these_target_values = this_storm_object_dict[ testing_io.TARGET_ARRAY_KEY] if len(these_target_values.shape) > 1: these_target_values = numpy.argmax(these_target_values, axis=1) target_values = numpy.concatenate((target_values, these_target_values)) these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if predictor_matrices is None: predictor_matrices = copy.deepcopy(these_predictor_matrices) else: for k in range(len(predictor_matrices)): predictor_matrices[k] = numpy.concatenate( (predictor_matrices[k], these_predictor_matrices[k])) print(SEPARATOR_STRING) correlation_matrix, predictor_names = correlation.get_pearson_correlations( predictor_matrices=predictor_matrices, cnn_metadata_dict=cnn_metadata_dict, separate_radar_heights=separate_radar_heights) print(SEPARATOR_STRING) num_predictors = len(predictor_names) for i in range(num_predictors): for j in range(i, num_predictors): print(('Pearson correlation between "{0:s}" and "{1:s}" = {2:.3f}' ).format(predictor_names[i], predictor_names[j], correlation_matrix[i, j])) print(SEPARATOR_STRING) if do_backwards_test: result_dict = permutation.run_backwards_test( model_object=model_object, predictor_matrices=predictor_matrices, target_values=target_values, cnn_metadata_dict=cnn_metadata_dict, cost_function=permutation_utils.negative_auc_function, separate_radar_heights=separate_radar_heights, num_bootstrap_reps=num_bootstrap_reps) else: result_dict = permutation.run_forward_test( model_object=model_object, predictor_matrices=predictor_matrices, target_values=target_values, cnn_metadata_dict=cnn_metadata_dict, cost_function=permutation_utils.negative_auc_function, separate_radar_heights=separate_radar_heights, num_bootstrap_reps=num_bootstrap_reps) print(SEPARATOR_STRING) result_dict[permutation_utils.MODEL_FILE_KEY] = model_file_name result_dict[permutation_utils.TARGET_VALUES_KEY] = target_values result_dict[permutation_utils.FULL_IDS_KEY] = full_storm_id_strings result_dict[permutation_utils.STORM_TIMES_KEY] = storm_times_unix_sec print('Writing results to: "{0:s}"...'.format(output_file_name)) permutation_utils.write_results(result_dict=result_dict, pickle_file_name=output_file_name)
def _run(model_file_name, example_file_name, first_time_string, last_time_string, top_output_dir_name): """Applies CNN to one example file. This is effectively the main method. :param model_file_name: See documentation at top of file. :param example_file_name: Same. :param first_time_string: Same. :param last_time_string: Same. :param top_output_dir_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) model_directory_name, _ = os.path.split(model_file_name) model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = cnn.read_model_metadata(model_metafile_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] first_time_unix_sec = time_conversion.string_to_unix_sec( first_time_string, INPUT_TIME_FORMAT) last_time_unix_sec = time_conversion.string_to_unix_sec( last_time_string, INPUT_TIME_FORMAT) training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = [example_file_name] training_option_dict[ trainval_io.FIRST_STORM_TIME_KEY] = first_time_unix_sec training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = last_time_unix_sec if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=LARGE_INTEGER) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None) try: storm_object_dict = next(generator_object) except StopIteration: storm_object_dict = None print(SEPARATOR_STRING) if storm_object_dict is not None: observed_labels = storm_object_dict[testing_io.TARGET_ARRAY_KEY] list_of_predictor_matrices = storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if include_soundings: sounding_matrix = list_of_predictor_matrices[-1] else: sounding_matrix = None if model_metadata_dict[cnn.CONV_2D3D_KEY]: if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: class_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=list_of_predictor_matrices[0], sounding_matrix=sounding_matrix, verbose=True) else: class_probability_matrix = cnn.apply_2d3d_cnn( model_object=model_object, reflectivity_matrix_dbz=list_of_predictor_matrices[0], azimuthal_shear_matrix_s01=list_of_predictor_matrices[1], sounding_matrix=sounding_matrix, verbose=True) else: class_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=list_of_predictor_matrices[0], sounding_matrix=sounding_matrix, verbose=True) print(SEPARATOR_STRING) num_examples = class_probability_matrix.shape[0] for k in [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]: print( '{0:d}th percentile of {1:d} forecast probs = {2:.4f}'.format( k, num_examples, numpy.percentile(class_probability_matrix[:, 1], k))) print('\n') target_param_dict = target_val_utils.target_name_to_params( training_option_dict[trainval_io.TARGET_NAME_KEY]) event_type_string = target_param_dict[target_val_utils.EVENT_TYPE_KEY] if event_type_string == linkage.TORNADO_EVENT_STRING: genesis_only = False elif event_type_string == linkage.TORNADOGENESIS_EVENT_STRING: genesis_only = True else: genesis_only = None target_name = target_val_utils.target_params_to_name( min_lead_time_sec=target_param_dict[ target_val_utils.MIN_LEAD_TIME_KEY], max_lead_time_sec=target_param_dict[ target_val_utils.MAX_LEAD_TIME_KEY], min_link_distance_metres=target_param_dict[ target_val_utils.MIN_LINKAGE_DISTANCE_KEY], max_link_distance_metres=10000., genesis_only=genesis_only) output_file_name = prediction_io.find_file( top_prediction_dir_name=top_output_dir_name, first_init_time_unix_sec=first_time_unix_sec, last_init_time_unix_sec=last_time_unix_sec, gridded=False, raise_error_if_missing=False) print('Writing "{0:s}" predictions to: "{1:s}"...'.format( target_name, output_file_name)) if storm_object_dict is None: num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1]) num_classes = max([num_output_neurons, 2]) class_probability_matrix = numpy.full((0, num_classes), numpy.nan) prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, storm_ids=[], storm_times_unix_sec=numpy.array([], dtype=int), target_name=target_name, observed_labels=numpy.array([], dtype=int)) return prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, storm_ids=storm_object_dict[testing_io.FULL_IDS_KEY], storm_times_unix_sec=storm_object_dict[testing_io.STORM_TIMES_KEY], target_name=target_name, observed_labels=observed_labels)
def _run(model_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples, num_bootstrap_reps, confidence_level, class_fraction_keys, class_fraction_values, output_dir_name): """Evaluates CNN (convolutional neural net) predictions. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples: Same. :param num_bootstrap_reps: Same. :param confidence_level: Same. :param class_fraction_keys: Same. :param class_fraction_values: Same. :param output_dir_name: Same. :raises: ValueError: if the model does multi-class classification. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1]) if num_output_neurons > 2: error_string = ( 'The model has {0:d} output neurons, which suggests {0:d}-class ' 'classification. This script handles only binary classification.' ).format(num_output_neurons) raise ValueError(error_string) soundings_only = False if isinstance(model_object.input, list): list_of_input_tensors = model_object.input else: list_of_input_tensors = [model_object.input] if len(list_of_input_tensors) == 1: these_spatial_dim = numpy.array( list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int) soundings_only = len(these_spatial_dim) == 1 model_directory_name, _ = os.path.split(model_file_name) model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = cnn.read_model_metadata(model_metafile_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] if len(class_fraction_keys) > 1: class_to_sampling_fraction_dict = dict( list(zip(class_fraction_keys, class_fraction_values))) else: class_to_sampling_fraction_dict = None training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) if soundings_only: generator_object = testing_io.sounding_generator( option_dict=training_option_dict, num_examples_total=num_examples) elif model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=num_examples) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=num_examples) include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None) forecast_probabilities = numpy.array([]) observed_labels = numpy.array([], dtype=int) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break observed_labels = numpy.concatenate( (observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY])) if soundings_only: these_predictor_matrices = [ this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY] ] else: these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if include_soundings: this_sounding_matrix = these_predictor_matrices[-1] else: this_sounding_matrix = None if soundings_only: this_probability_matrix = cnn.apply_cnn_soundings_only( model_object=model_object, sounding_matrix=this_sounding_matrix, verbose=True) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d3d_cnn( model_object=model_object, reflectivity_matrix_dbz=these_predictor_matrices[0], azimuthal_shear_matrix_s01=these_predictor_matrices[1], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) print(SEPARATOR_STRING) forecast_probabilities = numpy.concatenate( (forecast_probabilities, this_probability_matrix[:, -1])) model_eval_helper.run_evaluation( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, num_bootstrap_reps=num_bootstrap_reps, confidence_level=confidence_level, output_dir_name=output_dir_name)
def _run(model_file_name, component_type_string, target_class, layer_name, neuron_indices_flattened, channel_indices, top_example_dir_name, first_spc_date_string, last_spc_date_string, output_file_name): """Creates activation maps for one class, neuron, or channel of a CNN. This is effectively the main method. :param model_file_name: See documentation at top of file. :param component_type_string: Same. :param target_class: Same. :param layer_name: Same. :param neuron_indices_flattened: Same. :param channel_indices: Same. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param output_file_name: Same. """ # Check input args. file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) model_interpretation.check_component_type(component_type_string) if component_type_string == CHANNEL_COMPONENT_TYPE_STRING: error_checking.assert_is_geq_numpy_array(channel_indices, 0) if component_type_string == NEURON_COMPONENT_TYPE_STRING: neuron_indices_flattened = neuron_indices_flattened.astype(float) neuron_indices_flattened[neuron_indices_flattened < 0] = numpy.nan neuron_indices_2d_list = general_utils.split_array_by_nan( neuron_indices_flattened) neuron_index_matrix = numpy.array(neuron_indices_2d_list, dtype=int) else: neuron_index_matrix = None # Read model and metadata. print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) metadata_file_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0]) print('Reading metadata from: "{0:s}"...'.format(metadata_file_name)) model_metadata_dict = cnn.read_model_metadata(metadata_file_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] # Create generator. example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=LARGE_INTEGER) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=LARGE_INTEGER) # Compute activation for each example (storm object) and model component. full_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) activation_matrix = None print(SEPARATOR_STRING) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) except StopIteration: break this_list_of_input_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] these_id_strings = this_storm_object_dict[testing_io.FULL_IDS_KEY] these_times_unix_sec = this_storm_object_dict[ testing_io.STORM_TIMES_KEY] full_id_strings += these_id_strings storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, these_times_unix_sec)) if component_type_string == CLASS_COMPONENT_TYPE_STRING: print('Computing activations for target class {0:d}...'.format( target_class)) this_activation_matrix = ( model_activation.get_class_activation_for_examples( model_object=model_object, target_class=target_class, list_of_input_matrices=this_list_of_input_matrices)) this_activation_matrix = numpy.reshape( this_activation_matrix, (len(this_activation_matrix), 1)) elif component_type_string == NEURON_COMPONENT_TYPE_STRING: this_activation_matrix = None for j in range(neuron_index_matrix.shape[0]): print(( 'Computing activations for neuron {0:s} in layer "{1:s}"...' ).format(str(neuron_index_matrix[j, :]), layer_name)) these_activations = ( model_activation.get_neuron_activation_for_examples( model_object=model_object, layer_name=layer_name, neuron_indices=neuron_index_matrix[j, :], list_of_input_matrices=this_list_of_input_matrices)) these_activations = numpy.reshape(these_activations, (len(these_activations), 1)) if this_activation_matrix is None: this_activation_matrix = these_activations + 0. else: this_activation_matrix = numpy.concatenate( (this_activation_matrix, these_activations), axis=1) else: this_activation_matrix = None for this_channel_index in channel_indices: print(('Computing activations for channel {0:d} in layer ' '"{1:s}"...').format(this_channel_index, layer_name)) these_activations = ( model_activation.get_channel_activation_for_examples( model_object=model_object, layer_name=layer_name, channel_index=this_channel_index, list_of_input_matrices=this_list_of_input_matrices, stat_function_for_neuron_activations=K.max)) these_activations = numpy.reshape(these_activations, (len(these_activations), 1)) if this_activation_matrix is None: this_activation_matrix = these_activations + 0. else: this_activation_matrix = numpy.concatenate( (this_activation_matrix, these_activations), axis=1) if activation_matrix is None: activation_matrix = this_activation_matrix + 0. else: activation_matrix = numpy.concatenate( (activation_matrix, this_activation_matrix), axis=0) print(SEPARATOR_STRING) print('Writing activations to file: "{0:s}"...'.format(output_file_name)) model_activation.write_file(pickle_file_name=output_file_name, activation_matrix=activation_matrix, full_id_strings=full_id_strings, storm_times_unix_sec=storm_times_unix_sec, model_file_name=model_file_name, component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_index_matrix=neuron_index_matrix, channel_indices=channel_indices)
def _run(model_file_name, top_example_dir_name, first_spc_date_string, last_spc_date_string, num_examples, class_fraction_keys, class_fraction_values, num_bootstrap_iters, bootstrap_confidence_level, output_file_name): """Runs permutation test for predictor importance. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param num_examples: Same. :param class_fraction_keys: Same. :param class_fraction_values: Same. :param num_bootstrap_iters: Same. :param bootstrap_confidence_level: Same. :param output_file_name: Same. """ print('Reading model from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) model_directory_name, _ = os.path.split(model_file_name) metadata_file_name = '{0:s}/model_metadata.p'.format(model_directory_name) print('Reading metadata from: "{0:s}"...'.format(metadata_file_name)) model_metadata_dict = cnn.read_model_metadata(metadata_file_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] if len(class_fraction_keys) > 1: class_to_sampling_fraction_dict = dict( list(zip(class_fraction_keys, class_fraction_values))) else: class_to_sampling_fraction_dict = None training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string)) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string)) if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, list_of_operation_dicts=model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY], num_examples_total=num_examples) elif model_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, num_examples_total=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, num_examples_total=num_examples) full_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) target_values = numpy.array([], dtype=int) list_of_predictor_matrices = None print(SEPARATOR_STRING) for _ in range(len(example_file_names)): try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY])) these_target_values = this_storm_object_dict[ testing_io.TARGET_ARRAY_KEY] if len(these_target_values.shape) > 1: these_target_values = numpy.argmax(these_target_values, axis=1) target_values = numpy.concatenate((target_values, these_target_values)) these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if list_of_predictor_matrices is None: list_of_predictor_matrices = copy.deepcopy( these_predictor_matrices) else: for k in range(len(list_of_predictor_matrices)): list_of_predictor_matrices[k] = numpy.concatenate( (list_of_predictor_matrices[k], these_predictor_matrices[k])) predictor_names_by_matrix = _create_predictor_names( model_metadata_dict=model_metadata_dict, list_of_predictor_matrices=list_of_predictor_matrices) for i in range(len(predictor_names_by_matrix)): print('Predictors in {0:d}th matrix:\n{1:s}\n'.format( i + 1, str(predictor_names_by_matrix[i]))) print(SEPARATOR_STRING) list_of_layer_operation_dicts = model_metadata_dict[ cnn.LAYER_OPERATIONS_KEY] if list_of_layer_operation_dicts is not None: correlation_matrix, predictor_names = _get_pearson_correlations( list_of_predictor_matrices=list_of_predictor_matrices, predictor_names_by_matrix=predictor_names_by_matrix, sounding_heights_m_agl=training_option_dict[ trainval_io.SOUNDING_HEIGHTS_KEY]) for i in range(len(predictor_names)): for j in range(i, len(predictor_names)): print(( 'Pearson correlation between "{0:s}" and "{1:s}" = {2:.4f}' ).format(predictor_names[i], predictor_names[j], correlation_matrix[i, j])) print('\n') if model_metadata_dict[cnn.CONV_2D3D_KEY]: prediction_function = permutation.prediction_function_2d3d_cnn else: num_radar_dimensions = len(list_of_predictor_matrices[0].shape) - 2 if num_radar_dimensions == 2: prediction_function = permutation.prediction_function_2d_cnn else: prediction_function = permutation.prediction_function_3d_cnn print(SEPARATOR_STRING) result_dict = permutation.run_permutation_test( model_object=model_object, list_of_input_matrices=list_of_predictor_matrices, predictor_names_by_matrix=predictor_names_by_matrix, target_values=target_values, prediction_function=prediction_function, cost_function=permutation.negative_auc_function, num_bootstrap_iters=num_bootstrap_iters, bootstrap_confidence_level=bootstrap_confidence_level) print(SEPARATOR_STRING) result_dict[permutation.MODEL_FILE_KEY] = model_file_name result_dict[permutation.TARGET_VALUES_KEY] = target_values result_dict[permutation.FULL_IDS_KEY] = full_id_strings result_dict[permutation.STORM_TIMES_KEY] = storm_times_unix_sec print('Writing results to: "{0:s}"...'.format(output_file_name)) permutation.write_results(result_dict=result_dict, pickle_file_name=output_file_name)
def _run(model_file_name, top_example_dir_name, storm_metafile_name, output_dir_name): """Uses trained CNN to make predictions for specific examples. This is effectively the main method. :param model_file_name: See documentation at top of file. :param top_example_dir_name: Same. :param storm_metafile_name: Same. :param output_dir_name: Same. :raises: ValueError: if the model does multi-class classification. """ print('Reading CNN from: "{0:s}"...'.format(model_file_name)) model_object = cnn.read_model(model_file_name) num_output_neurons = ( model_object.layers[-1].output.get_shape().as_list()[-1] ) if num_output_neurons > 2: error_string = ( 'The model has {0:d} output neurons, which suggests {0:d}-class ' 'classification. This script handles only binary classification.' ).format(num_output_neurons) raise ValueError(error_string) soundings_only = False if isinstance(model_object.input, list): list_of_input_tensors = model_object.input else: list_of_input_tensors = [model_object.input] if len(list_of_input_tensors) == 1: these_spatial_dim = numpy.array( list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int ) soundings_only = len(these_spatial_dim) == 1 cnn_metafile_name = cnn.find_metafile( model_file_name=model_file_name, raise_error_if_missing=True ) print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)) cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name) print('Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)) desired_full_id_strings, desired_times_unix_sec = ( tracking_io.read_ids_and_times(storm_metafile_name) ) unique_spc_date_strings = list(set([ time_conversion.time_to_spc_date_string(t) for t in desired_times_unix_sec ])) example_file_names = [ input_examples.find_example_file( top_directory_name=top_example_dir_name, shuffled=False, spc_date_string=d, raise_error_if_missing=True ) for d in unique_spc_date_strings ] first_spc_date_string = time_conversion.time_to_spc_date_string( numpy.min(desired_times_unix_sec) ) last_spc_date_string = time_conversion.time_to_spc_date_string( numpy.max(desired_times_unix_sec) ) training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = ( time_conversion.get_start_of_spc_date(first_spc_date_string) ) training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = ( time_conversion.get_end_of_spc_date(last_spc_date_string) ) training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = ( NUM_EXAMPLES_PER_BATCH ) if soundings_only: generator_object = testing_io.sounding_generator( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec) elif cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec, list_of_operation_dicts=cnn_metadata_dict[ cnn.LAYER_OPERATIONS_KEY] ) elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, desired_full_id_strings=desired_full_id_strings, desired_times_unix_sec=desired_times_unix_sec) include_soundings = ( training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None ) full_storm_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) observed_labels = numpy.array([], dtype=int) class_probability_matrix = None while True: try: this_storm_object_dict = next(generator_object) print(SEPARATOR_STRING) except StopIteration: break full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate(( storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY] )) observed_labels = numpy.concatenate(( observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY] )) if soundings_only: these_predictor_matrices = [ this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY] ] else: these_predictor_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] if include_soundings: this_sounding_matrix = these_predictor_matrices[-1] else: this_sounding_matrix = None if soundings_only: this_probability_matrix = cnn.apply_cnn_soundings_only( model_object=model_object, sounding_matrix=this_sounding_matrix, verbose=True) elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]: if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d3d_cnn( model_object=model_object, reflectivity_matrix_dbz=these_predictor_matrices[0], azimuthal_shear_matrix_s01=these_predictor_matrices[1], sounding_matrix=this_sounding_matrix, verbose=True) else: this_probability_matrix = cnn.apply_2d_or_3d_cnn( model_object=model_object, radar_image_matrix=these_predictor_matrices[0], sounding_matrix=this_sounding_matrix, verbose=True) print(SEPARATOR_STRING) if class_probability_matrix is None: class_probability_matrix = this_probability_matrix + 0. else: class_probability_matrix = numpy.concatenate( (class_probability_matrix, this_probability_matrix), axis=0 ) output_file_name = prediction_io.find_ungridded_file( directory_name=output_dir_name, raise_error_if_missing=False) print('Writing results to: "{0:s}"...'.format(output_file_name)) prediction_io.write_ungridded_predictions( netcdf_file_name=output_file_name, class_probability_matrix=class_probability_matrix, observed_labels=observed_labels, storm_ids=full_storm_id_strings, storm_times_unix_sec=storm_times_unix_sec, target_name=training_option_dict[trainval_io.TARGET_NAME_KEY], model_file_name=model_file_name )
def _apply_upconvnet_one_file(example_file_name, num_examples, upconvnet_model_object, cnn_model_object, cnn_metadata_dict, cnn_feature_layer_name, upconvnet_file_name, top_output_dir_name): """Applies upconvnet to examples from one file. :param example_file_name: Path to input file (will be read by `input_examples.read_example_file`). :param num_examples: Number of examples to read. :param upconvnet_model_object: Trained upconvnet (instance of `keras.models.Model` or `keras.models.Sequential`). :param cnn_model_object: Trained CNN (instance of `keras.models.Model` or `keras.models.Sequential`). :param cnn_metadata_dict: Dictionary returned by `cnn.read_model_metadata`. param cnn_feature_layer_name: Name of CNN layer whose output is the feature vector, which is the input to the upconvnet. :param upconvnet_file_name: See documentation at top of file. :param top_output_dir_name: Same. """ # Do housekeeping. training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = [example_file_name] if cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None: generator_object = testing_io.gridrad_generator_2d_reduced( option_dict=training_option_dict, desired_num_examples=num_examples, list_of_operation_dicts=cnn_metadata_dict[ cnn.LAYER_OPERATIONS_KEY]) elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]: generator_object = testing_io.myrorss_generator_2d3d( option_dict=training_option_dict, desired_num_examples=num_examples) else: generator_object = testing_io.generator_2d_or_3d( option_dict=training_option_dict, desired_num_examples=num_examples) # Apply upconvnet. full_storm_id_strings = [] storm_times_unix_sec = numpy.array([], dtype=int) reconstructed_radar_matrix = None mse_by_example = numpy.array([], dtype=float) while True: try: this_storm_object_dict = next(generator_object) print('\n') except StopIteration: break full_storm_id_strings += this_storm_object_dict[ testing_io.FULL_IDS_KEY] storm_times_unix_sec = numpy.concatenate( (storm_times_unix_sec, this_storm_object_dict[testing_io.STORM_TIMES_KEY])) these_input_matrices = this_storm_object_dict[ testing_io.INPUT_MATRICES_KEY] this_actual_matrix = these_input_matrices[0] this_reconstructed_matrix = upconvnet.apply_upconvnet( cnn_input_matrices=these_input_matrices, cnn_model_object=cnn_model_object, cnn_feature_layer_name=cnn_feature_layer_name, ucn_model_object=upconvnet_model_object, verbose=True) print(MINOR_SEPARATOR_STRING) if reconstructed_radar_matrix is None: reconstructed_radar_matrix = this_reconstructed_matrix + 0. else: reconstructed_radar_matrix = numpy.concatenate( (reconstructed_radar_matrix, this_reconstructed_matrix), axis=0) num_dimensions = len(this_actual_matrix.shape) all_axes_except_first = numpy.linspace(1, num_dimensions - 1, num=num_dimensions - 1, dtype=int).tolist() these_mse = numpy.mean( (this_actual_matrix - this_reconstructed_matrix)**2, axis=tuple(all_axes_except_first)) mse_by_example = numpy.concatenate((mse_by_example, these_mse)) print(MINOR_SEPARATOR_STRING) if len(full_storm_id_strings) == 0: return print('Mean sqaured error = {0:.3e}'.format(numpy.mean(mse_by_example))) # Denormalize reconstructed images. print('Denormalizing reconstructed radar images...') metadata_dict_no_soundings = copy.deepcopy(cnn_metadata_dict) metadata_dict_no_soundings[cnn.TRAINING_OPTION_DICT_KEY][ trainval_io.SOUNDING_FIELDS_KEY] = None option_dict_no_soundings = metadata_dict_no_soundings[ cnn.TRAINING_OPTION_DICT_KEY] denorm_recon_radar_matrices = trainval_io.separate_shear_and_reflectivity( list_of_input_matrices=[reconstructed_radar_matrix], training_option_dict=option_dict_no_soundings) if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]: denorm_recon_radar_matrices[0] = trainval_io.downsample_reflectivity( reflectivity_matrix_dbz=denorm_recon_radar_matrices[0][..., 0]) denorm_recon_radar_matrices[0] = numpy.expand_dims( denorm_recon_radar_matrices[0], axis=-1) denorm_recon_radar_matrices = model_interpretation.denormalize_data( list_of_input_matrices=denorm_recon_radar_matrices, model_metadata_dict=metadata_dict_no_soundings) # Write reconstructed images. spc_date_string = time_conversion.time_to_spc_date_string( numpy.median(storm_times_unix_sec).astype(int)) output_file_name = upconvnet.find_prediction_file( top_directory_name=top_output_dir_name, spc_date_string=spc_date_string, raise_error_if_missing=False) print('Writing predictions to: "{0:s}"...'.format(output_file_name)) upconvnet.write_predictions( pickle_file_name=output_file_name, denorm_recon_radar_matrices=denorm_recon_radar_matrices, full_storm_id_strings=full_storm_id_strings, storm_times_unix_sec=storm_times_unix_sec, mse_by_example=mse_by_example, upconvnet_file_name=upconvnet_file_name)