def fields_and_heights_to_names(field_names, heights_m_agl, include_units=True): """Converts list of radar field/height pairs to panel names. P = number of panels :param field_names: length-P list with names of radar fields. Each must be accepted by `radar_utils.check_field_name`. :param heights_m_agl: length-P numpy array of heights (metres above ground level). :param include_units: Boolean flag. If True, panel names will include units. :return: panel_names: length-P list of panel names (to be printed at bottoms of panels). """ error_checking.assert_is_boolean(include_units) error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), num_dimensions=1) num_panels = len(field_names) error_checking.assert_is_numpy_array(heights_m_agl, exact_dimensions=numpy.array( [num_panels])) error_checking.assert_is_geq_numpy_array(heights_m_agl, 0.) heights_m_agl = numpy.round(heights_m_agl).astype(int) panel_names = [''] * num_panels for i in range(num_panels): this_field_name_verbose = field_name_to_verbose( field_name=field_names[i], include_units=include_units) panel_names[i] = '{0:s}\nat {1:d} km AGL'.format( this_field_name_verbose, int(numpy.round(heights_m_agl[i] * METRES_TO_KM))) return panel_names
def _check_statistic_names(statistic_names): """Ensures that statistic names are valid. :param statistic_names: 1-D list of statistic names. :raises: ValueError: if any element of `statistic_names` is not in `STATISTIC_NAMES`. """ error_checking.assert_is_string_list(statistic_names) error_checking.assert_is_numpy_array(numpy.array(statistic_names), num_dimensions=1) for this_name in statistic_names: if this_name in STATISTIC_NAMES: continue error_string = ('\n\n' + str(STATISTIC_NAMES) + '\n\nValid statistic names ' + '(listed above) do not include the following: "' + this_name + '"') raise ValueError(error_string)
def get_region_properties(binary_image_matrix, property_names=DEFAULT_REGION_PROP_NAMES): """Computes region properties for one shape (polygon). M = number of rows in grid N = number of columns in grid :param binary_image_matrix: M-by-N Boolean numpy array. If binary_image_matrix[i, j] = True, grid point [i, j] is inside the polygon. Otherwise, grid point [i, j] is outside the polygon. :param property_names: 1-D list of region properties to compute. :return: property_dict: Dictionary, where each key is a string from `property_names` and each item is the corresponding value. """ error_checking.assert_is_boolean_numpy_array(binary_image_matrix) error_checking.assert_is_numpy_array(binary_image_matrix, num_dimensions=2) error_checking.assert_is_string_list(property_names) error_checking.assert_is_numpy_array(numpy.array(property_names), num_dimensions=1) regionprops_object = skimage.measure.regionprops( binary_image_matrix.astype(int))[0] property_dict = {} for this_name in property_names: if this_name == ORIENTATION_NAME: property_dict.update({ this_name: RADIANS_TO_DEGREES * getattr(regionprops_object, _stat_name_new_to_orig(this_name)) }) else: property_dict.update({ this_name: getattr(regionprops_object, _stat_name_new_to_orig(this_name)) }) return property_dict
def fields_and_refl_heights_to_dict(field_names, data_source, refl_heights_m_asl=None): """Converts two arrays (field names and reflectivity heights) to dictionary. :param field_names: 1-D list with names of radar fields in GewitterGefahr format. :param data_source: Data source (string). :param refl_heights_m_asl: 1-D numpy array of reflectivity heights (metres above sea level). :return: field_to_heights_dict_m_asl: Dictionary, where each key is a field name and each value is a 1-D numpy array of heights (metres above sea level). """ check_data_source(data_source) error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), num_dimensions=1) field_to_heights_dict_m_asl = {} for this_field_name in field_names: if this_field_name == radar_utils.REFL_NAME: radar_utils.check_heights(data_source=data_source, heights_m_asl=refl_heights_m_asl, field_name=this_field_name) field_to_heights_dict_m_asl.update( {this_field_name: refl_heights_m_asl}) else: field_to_heights_dict_m_asl.update({ this_field_name: radar_utils.get_valid_heights(data_source=data_source, field_name=this_field_name) }) return field_to_heights_dict_m_asl
def get_curvature_based_stats( polygon_object_xy, statistic_names=DEFAULT_CURVATURE_BASED_STAT_NAMES): """Computes curvature-based statistics for one shape (polygon). :param polygon_object_xy: Instance of `shapely.geometry.Polygon`, where x- and y-coordinates are in metres. If the polygon is a storm object (or anything else with only 90-degree angles), we recommend (but do not enforce) that it be smoothed -- using, for example, `smoothing_via_iterative_averaging.sia_for_closed_polygon`. :param statistic_names: 1-D list of curvature-based statistics to compute. :return: statistic_dict: Dictionary, where each key is a string from `statistic_names` and each item is the corresponding value. """ error_checking.assert_is_string_list(statistic_names) error_checking.assert_is_numpy_array( numpy.array(statistic_names), num_dimensions=1) vertex_curvatures_metres01 = shape_utils.get_curvature_for_closed_polygon( polygon_object_xy) statistic_dict = {} if MEAN_ABS_CURVATURE_NAME in statistic_names: statistic_dict.update( {MEAN_ABS_CURVATURE_NAME: numpy.mean(numpy.absolute(vertex_curvatures_metres01))}) if BENDING_ENERGY_NAME in statistic_names: statistic_dict.update( {BENDING_ENERGY_NAME: numpy.sum( vertex_curvatures_metres01 ** 2) / polygon_object_xy.length}) if COMPACTNESS_NAME in statistic_names: statistic_dict.update( {COMPACTNESS_NAME: polygon_object_xy.length ** 2 / ( 4 * numpy.pi * polygon_object_xy.area)}) return statistic_dict
def unzip_tar(tar_file_name, target_directory_name=None, file_and_dir_names_to_unzip=None): """Unzips tar file. :param tar_file_name: Path to input file. :param target_directory_name: Path to output directory. :param file_and_dir_names_to_unzip: List of files and directories to extract from the tar file. Each list element should be a relative path inside the tar file. After unzipping, the same relative path will exist inside `target_directory_name`. """ error_checking.assert_is_string(tar_file_name) error_checking.assert_is_string_list(file_and_dir_names_to_unzip) file_system_utils.mkdir_recursive_if_necessary( directory_name=target_directory_name) unix_command_string = 'tar -C "{0:s}" -xvf "{1:s}"'.format( target_directory_name, tar_file_name) for this_relative_path in file_and_dir_names_to_unzip: unix_command_string += ' "' + this_relative_path + '"' os.system(unix_command_string)
def get_basic_statistics(polygon_object_xy, statistic_names=DEFAULT_BASIC_STAT_NAMES): """Computes basic statistics for simple polygon. A "basic statistic" is one stored in the `shapely.geometry.Polygon` object. :param polygon_object_xy: Instance of `shapely.geometry.Polygon`. :param statistic_names: 1-D list of basic stats to compute. :return: basic_stat_dict: Dictionary, where each key is a string from `statistic_names` and each item is the corresponding value. """ error_checking.assert_is_string_list(statistic_names) error_checking.assert_is_numpy_array(numpy.array(statistic_names), num_dimensions=1) basic_stat_dict = {} if AREA_NAME in statistic_names: basic_stat_dict.update({AREA_NAME: polygon_object_xy.area}) if PERIMETER_NAME in statistic_names: basic_stat_dict.update({PERIMETER_NAME: polygon_object_xy.length}) return basic_stat_dict
def fields_and_refl_heights_to_pairs(field_names, heights_m_asl): """Converts unique arrays (field names and heights) to non-unique ones. F = number of fields H = number of heights N = F * H = number of field/height pairs :param field_names: length-F list with names of radar fields in GewitterGefahr format. :param heights_m_asl: length-H numpy array of heights (metres above sea level). :return: field_name_by_pair: length-N list of field names. :return: height_by_pair_m_asl: length-N numpy array of corresponding heights (metres above sea level). """ error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), num_dimensions=1) radar_utils.check_heights(data_source=radar_utils.GRIDRAD_SOURCE_ID, heights_m_asl=heights_m_asl) field_name_by_pair = [] height_by_pair_m_asl = numpy.array([], dtype=int) for this_field_name in field_names: radar_utils.field_name_new_to_orig( field_name=this_field_name, data_source_name=radar_utils.GRIDRAD_SOURCE_ID) field_name_by_pair += [this_field_name] * len(heights_m_asl) height_by_pair_m_asl = numpy.concatenate( (height_by_pair_m_asl, heights_m_asl)) return field_name_by_pair, height_by_pair_m_asl
def add_metadata(novelty_dict, baseline_full_id_strings, baseline_storm_times_unix_sec, trial_full_id_strings, trial_storm_times_unix_sec, cnn_file_name, upconvnet_file_name): """Adds metadata to novelty-detection results. B = number of baseline examples T = number of trial examples :param novelty_dict: Dictionary created by `do_novelty_detection`. :param baseline_full_id_strings: length-B list of full storm IDs for baseline examples. :param baseline_storm_times_unix_sec: length-B numpy array of valid times for baseline examples. :param trial_full_id_strings: length-T list of full storm IDs for trial examples. :param trial_storm_times_unix_sec: length-T numpy array of valid times for baseline examples. :param cnn_file_name: Path to file with CNN used for novelty detection (readable by `cnn.read_model`). :param upconvnet_file_name: Path to file with upconvnet used for novelty detection (readable by `cnn.read_model`). :return: novelty_dict: Dictionary with the following keys. novelty_dict['list_of_baseline_input_matrices']: See doc for `do_novelty_detection`. novelty_dict['list_of_trial_input_matrices']: Same. novelty_dict['novel_indices']: Same. novelty_dict['novel_image_matrix_upconv']: Same. novelty_dict['novel_image_matrix_upconv_svd']: Same. novelty_dict['percent_svd_variance_to_keep']: Same. novelty_dict['cnn_feature_layer_name']: Same. novelty_dict['multipass']: Same. novelty_dict['baseline_full_id_strings']: See input doc for this method. novelty_dict['baseline_storm_times_unix_sec']: Same. novelty_dict['trial_full_id_strings']: Same. novelty_dict['trial_storm_times_unix_sec']: Same. novelty_dict['cnn_file_name']: Same. novelty_dict['upconvnet_file_name']: Same. """ num_baseline_examples = novelty_dict[BASELINE_INPUTS_KEY][0].shape[0] these_expected_dim = numpy.array([num_baseline_examples], dtype=int) error_checking.assert_is_string_list(baseline_full_id_strings) error_checking.assert_is_numpy_array(numpy.array(baseline_full_id_strings), exact_dimensions=these_expected_dim) error_checking.assert_is_integer_numpy_array(baseline_storm_times_unix_sec) error_checking.assert_is_numpy_array(baseline_storm_times_unix_sec, exact_dimensions=these_expected_dim) num_trial_examples = novelty_dict[TRIAL_INPUTS_KEY][0].shape[0] these_expected_dim = numpy.array([num_trial_examples], dtype=int) error_checking.assert_is_string_list(trial_full_id_strings) error_checking.assert_is_numpy_array(numpy.array(trial_full_id_strings), exact_dimensions=these_expected_dim) error_checking.assert_is_integer_numpy_array(trial_storm_times_unix_sec) error_checking.assert_is_numpy_array(trial_storm_times_unix_sec, exact_dimensions=these_expected_dim) error_checking.assert_is_string(cnn_file_name) error_checking.assert_is_string(upconvnet_file_name) novelty_dict.update({ BASELINE_IDS_KEY: baseline_full_id_strings, BASELINE_STORM_TIMES_KEY: baseline_storm_times_unix_sec, TRIAL_IDS_KEY: trial_full_id_strings, TRIAL_STORM_TIMES_KEY: trial_storm_times_unix_sec, CNN_FILE_KEY: cnn_file_name, UPCONVNET_FILE_KEY: upconvnet_file_name }) return novelty_dict
def write_standard_file(pickle_file_name, list_of_input_matrices, list_of_saliency_matrices, storm_ids, storm_times_unix_sec, model_file_name, saliency_metadata_dict, sounding_pressure_matrix_pascals=None): """Writes saliency maps (one per example) to Pickle file. T = number of input tensors to the model E = number of examples (storm objects) H = number of height levels per sounding :param pickle_file_name: Path to output file. :param list_of_input_matrices: length-T list of numpy arrays, containing predictors (inputs to the model). The first dimension of each array must have length E. :param list_of_saliency_matrices: length-T list of numpy arrays, containing saliency values. list_of_saliency_matrices[i] must have the same dimensions as list_of_input_matrices[i]. :param storm_ids: length-E list of storm IDs (strings). :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to file with trained model (readable by `cnn.read_model`). :param saliency_metadata_dict: Dictionary created by `check_metadata`. :param sounding_pressure_matrix_pascals: E-by-H numpy array of pressure levels in soundings. Useful only when the model input contains soundings with no pressure, because it is needed to plot soundings. :raises: ValueError: if `list_of_input_matrices` and `list_of_saliency_matrices` have different lengths. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_string_list(storm_ids) error_checking.assert_is_numpy_array(numpy.array(storm_ids), num_dimensions=1) num_storm_objects = len(storm_ids) these_expected_dim = numpy.array([num_storm_objects], dtype=int) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array(storm_times_unix_sec, exact_dimensions=these_expected_dim) error_checking.assert_is_list(list_of_input_matrices) error_checking.assert_is_list(list_of_saliency_matrices) num_input_matrices = len(list_of_input_matrices) num_saliency_matrices = len(list_of_saliency_matrices) if num_input_matrices != num_saliency_matrices: error_string = ( 'Number of input matrices ({0:d}) should equal number of saliency ' 'matrices ({1:d}).').format(num_input_matrices, num_saliency_matrices) raise ValueError(error_string) for i in range(num_input_matrices): error_checking.assert_is_numpy_array_without_nan( list_of_input_matrices[i]) error_checking.assert_is_numpy_array_without_nan( list_of_saliency_matrices[i]) these_expected_dim = numpy.array( (num_storm_objects, ) + list_of_input_matrices[i].shape[1:], dtype=int) error_checking.assert_is_numpy_array( list_of_input_matrices[i], exact_dimensions=these_expected_dim) these_expected_dim = numpy.array(list_of_input_matrices[i].shape, dtype=int) error_checking.assert_is_numpy_array( list_of_saliency_matrices[i], exact_dimensions=these_expected_dim) if sounding_pressure_matrix_pascals is not None: error_checking.assert_is_numpy_array_without_nan( sounding_pressure_matrix_pascals) error_checking.assert_is_greater_numpy_array( sounding_pressure_matrix_pascals, 0.) error_checking.assert_is_numpy_array(sounding_pressure_matrix_pascals, num_dimensions=2) these_expected_dim = numpy.array( (num_storm_objects, ) + sounding_pressure_matrix_pascals.shape[1:], dtype=int) error_checking.assert_is_numpy_array( sounding_pressure_matrix_pascals, exact_dimensions=these_expected_dim) saliency_dict = { INPUT_MATRICES_KEY: list_of_input_matrices, SALIENCY_MATRICES_KEY: list_of_saliency_matrices, STORM_IDS_KEY: storm_ids, STORM_TIMES_KEY: storm_times_unix_sec, MODEL_FILE_NAME_KEY: model_file_name, COMPONENT_TYPE_KEY: saliency_metadata_dict[COMPONENT_TYPE_KEY], TARGET_CLASS_KEY: saliency_metadata_dict[TARGET_CLASS_KEY], LAYER_NAME_KEY: saliency_metadata_dict[LAYER_NAME_KEY], IDEAL_ACTIVATION_KEY: saliency_metadata_dict[IDEAL_ACTIVATION_KEY], NEURON_INDICES_KEY: saliency_metadata_dict[NEURON_INDICES_KEY], CHANNEL_INDEX_KEY: saliency_metadata_dict[CHANNEL_INDEX_KEY], SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pascals } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(saliency_dict, pickle_file_handle) pickle_file_handle.close()
def plot_many_2d_grids(data_matrix, field_names, axes_objects, panel_names=None, plot_grid_lines=True, colour_map_objects=None, colour_norm_objects=None, refl_opacity=DEFAULT_OPACITY, plot_colour_bar_flags=None, panel_name_font_size=DEFAULT_FONT_SIZE, colour_bar_font_size=DEFAULT_FONT_SIZE, colour_bar_length=DEFAULT_COLOUR_BAR_LENGTH): """Plots many 2-D grids in paneled figure. M = number of rows in grid N = number of columns in grid C = number of fields :param data_matrix: M-by-N-by-C numpy array of radar values. :param field_names: length-C list of field names. :param axes_objects: length-C list of axes handles (instances of `matplotlib.axes._subplots.AxesSubplot`). :param panel_names: length-C list of panel names (to be printed at bottom of each panel). If None, panel names will not be printed. :param plot_grid_lines: Boolean flag. If True, will plot grid lines over radar images. :param colour_map_objects: length-C list of colour schemes (instances of `matplotlib.pyplot.cm` or similar). If None, will use default colour scheme for each field. :param colour_norm_objects: length-C list of colour-normalizers (instances of `matplotlib.colors.BoundaryNorm` or similar). If None, will use default normalizer for each field. :param refl_opacity: Opacity for reflectivity colour scheme. Used only if `colour_map_objects is None and colour_norm_objects is None`. :param plot_colour_bar_flags: length-C numpy array of Boolean flags. If `plot_colour_bar_flags[k] == True`, will plot colour bar for [k]th panel. If None, will plot no colour bars. :param panel_name_font_size: Font size for panel names. :param colour_bar_font_size: Font size for colour-bar tick marks. :param colour_bar_length: Length of colour bars (as fraction of axis length). :return: colour_bar_objects: length-C list of colour bars. If `plot_colour_bar_flags[k] == False`, colour_bar_objects[k] will be None. """ error_checking.assert_is_numpy_array(data_matrix, num_dimensions=3) num_fields = data_matrix.shape[-1] these_expected_dim = numpy.array([num_fields], dtype=int) error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), exact_dimensions=these_expected_dim) error_checking.assert_is_numpy_array(numpy.array(axes_objects), exact_dimensions=these_expected_dim) if panel_names is None: panel_names = [None] * num_fields else: error_checking.assert_is_string_list(panel_names) error_checking.assert_is_numpy_array( numpy.array(panel_names), exact_dimensions=these_expected_dim) if colour_map_objects is None or colour_norm_objects is None: colour_map_objects = [None] * num_fields colour_norm_objects = [None] * num_fields else: error_checking.assert_is_numpy_array( numpy.array(colour_map_objects), exact_dimensions=these_expected_dim) error_checking.assert_is_numpy_array( numpy.array(colour_norm_objects), exact_dimensions=these_expected_dim) if plot_colour_bar_flags is None: plot_colour_bar_flags = numpy.full(num_fields, 0, dtype=bool) error_checking.assert_is_boolean_numpy_array(plot_colour_bar_flags) error_checking.assert_is_numpy_array(plot_colour_bar_flags, exact_dimensions=these_expected_dim) colour_bar_objects = [None] * num_fields for k in range(num_fields): this_colour_map_object, this_colour_norm_object = ( plot_2d_grid_without_coords( field_matrix=data_matrix[..., k], field_name=field_names[k], axes_object=axes_objects[k], annotation_string=panel_names[k], font_size=panel_name_font_size, plot_grid_lines=plot_grid_lines, colour_map_object=copy.deepcopy(colour_map_objects[k]), colour_norm_object=copy.deepcopy(colour_norm_objects[k]), refl_opacity=refl_opacity)) if not plot_colour_bar_flags[k]: continue colour_bar_objects[k] = plotting_utils.plot_colour_bar( axes_object_or_matrix=axes_objects[k], data_matrix=data_matrix[..., k], colour_map_object=this_colour_map_object, colour_norm_object=this_colour_norm_object, orientation_string='horizontal', font_size=colour_bar_font_size, fraction_of_axis_length=colour_bar_length, extend_min=field_names[k] in SHEAR_VORT_DIV_NAMES, extend_max=True) return colour_bar_objects
def write_file(netcdf_file_name, init_scalar_predictor_matrix, final_scalar_predictor_matrix, init_vector_predictor_matrix, final_vector_predictor_matrix, initial_activations, final_activations, example_id_strings, model_file_name, layer_name, neuron_indices, ideal_activation, num_iterations, learning_rate, l2_weight): """Writes backwards-optimization results to file. E = number of examples H = number of heights P_s = number of scalar predictors P_v = number of vector predictors :param netcdf_file_name: Path to output file. :param init_scalar_predictor_matrix: numpy array (E x P_s) of initial predictor values. :param final_scalar_predictor_matrix: Same but with final values. :param init_vector_predictor_matrix: numpy array (E x H x P_v) of initial predictor values. :param final_vector_predictor_matrix: Same but with final values. :param initial_activations: length-E numpy array of initial activations, before optimization. :param final_activations: Same but with final activations, after optimization. :param example_id_strings: length-E list of example IDs. :param model_file_name: Path to file with neural net used to create saliency maps (readable by `neural_net.read_model`). :param layer_name: See doc for `check_metadata`. :param neuron_indices: Same. :param ideal_activation: Same. :param num_iterations: Same. :param learning_rate: Same. :param l2_weight: Same. """ # Check input args. check_metadata(layer_name=layer_name, neuron_indices=neuron_indices, ideal_activation=ideal_activation, num_iterations=num_iterations, learning_rate=learning_rate, l2_weight=l2_weight) error_checking.assert_is_numpy_array_without_nan( init_scalar_predictor_matrix) error_checking.assert_is_numpy_array(init_scalar_predictor_matrix, num_dimensions=2) error_checking.assert_is_numpy_array_without_nan( final_scalar_predictor_matrix) error_checking.assert_is_numpy_array( final_scalar_predictor_matrix, exact_dimensions=numpy.array(init_scalar_predictor_matrix.shape, dtype=int)) error_checking.assert_is_numpy_array_without_nan( init_vector_predictor_matrix) error_checking.assert_is_numpy_array(init_vector_predictor_matrix, num_dimensions=3) error_checking.assert_is_numpy_array_without_nan( final_vector_predictor_matrix) error_checking.assert_is_numpy_array( final_vector_predictor_matrix, exact_dimensions=numpy.array(init_vector_predictor_matrix.shape, dtype=int)) num_examples = init_vector_predictor_matrix.shape[0] expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_numpy_array_without_nan(initial_activations) error_checking.assert_is_numpy_array(initial_activations, exact_dimensions=expected_dim) error_checking.assert_is_numpy_array_without_nan(final_activations) error_checking.assert_is_numpy_array(final_activations, exact_dimensions=expected_dim) error_checking.assert_is_string_list(example_id_strings) error_checking.assert_is_numpy_array(numpy.array(example_id_strings), exact_dimensions=expected_dim) error_checking.assert_is_string(model_file_name) # Write to NetCDF file. file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name) dataset_object = netCDF4.Dataset(netcdf_file_name, 'w', format='NETCDF3_64BIT_OFFSET') dataset_object.setncattr(MODEL_FILE_KEY, model_file_name) dataset_object.setncattr(LAYER_NAME_KEY, layer_name) dataset_object.setncattr(NEURON_INDICES_KEY, neuron_indices) dataset_object.setncattr(IDEAL_ACTIVATION_KEY, ideal_activation) dataset_object.setncattr(NUM_ITERATIONS_KEY, num_iterations) dataset_object.setncattr(LEARNING_RATE_KEY, learning_rate) dataset_object.setncattr(L2_WEIGHT_KEY, l2_weight) dataset_object.createDimension(EXAMPLE_DIMENSION_KEY, num_examples) dataset_object.createDimension(SCALAR_PREDICTOR_DIM_KEY, init_scalar_predictor_matrix.shape[-1]) dataset_object.createDimension(HEIGHT_DIMENSION_KEY, init_vector_predictor_matrix.shape[1]) dataset_object.createDimension(VECTOR_PREDICTOR_DIM_KEY, init_vector_predictor_matrix.shape[2]) if num_examples == 0: num_id_characters = 1 else: num_id_characters = numpy.max( numpy.array([len(id) for id in example_id_strings])) dataset_object.createDimension(EXAMPLE_ID_CHAR_DIM_KEY, num_id_characters) this_string_format = 'S{0:d}'.format(num_id_characters) example_ids_char_array = netCDF4.stringtochar( numpy.array(example_id_strings, dtype=this_string_format)) dataset_object.createVariable(EXAMPLE_IDS_KEY, datatype='S1', dimensions=(EXAMPLE_DIMENSION_KEY, EXAMPLE_ID_CHAR_DIM_KEY)) dataset_object.variables[EXAMPLE_IDS_KEY][:] = numpy.array( example_ids_char_array) if init_scalar_predictor_matrix.size > 0: these_dim = (EXAMPLE_DIMENSION_KEY, SCALAR_PREDICTOR_DIM_KEY) dataset_object.createVariable(INIT_SCALAR_PREDICTORS_KEY, datatype=numpy.float32, dimensions=these_dim) dataset_object.variables[INIT_SCALAR_PREDICTORS_KEY][:] = ( init_scalar_predictor_matrix) dataset_object.createVariable(FINAL_SCALAR_PREDICTORS_KEY, datatype=numpy.float32, dimensions=these_dim) dataset_object.variables[FINAL_SCALAR_PREDICTORS_KEY][:] = ( final_scalar_predictor_matrix) if init_vector_predictor_matrix.size > 0: these_dim = (EXAMPLE_DIMENSION_KEY, HEIGHT_DIMENSION_KEY, VECTOR_PREDICTOR_DIM_KEY) dataset_object.createVariable(INIT_VECTOR_PREDICTORS_KEY, datatype=numpy.float32, dimensions=these_dim) dataset_object.variables[INIT_VECTOR_PREDICTORS_KEY][:] = ( init_vector_predictor_matrix) dataset_object.createVariable(FINAL_VECTOR_PREDICTORS_KEY, datatype=numpy.float32, dimensions=these_dim) dataset_object.variables[FINAL_VECTOR_PREDICTORS_KEY][:] = ( final_vector_predictor_matrix) dataset_object.createVariable(INITIAL_ACTIVATIONS_KEY, datatype=numpy.float32, dimensions=EXAMPLE_DIMENSION_KEY) dataset_object.variables[INITIAL_ACTIVATIONS_KEY][:] = initial_activations dataset_object.createVariable(FINAL_ACTIVATIONS_KEY, datatype=numpy.float32, dimensions=EXAMPLE_DIMENSION_KEY) dataset_object.variables[FINAL_ACTIVATIONS_KEY][:] = initial_activations dataset_object.close()
def soundings_to_metpy_dictionaries(sounding_matrix, field_names, height_levels_m_agl=None, storm_elevations_m_asl=None): """Converts soundings to format required by MetPy. If `sounding_matrix` contains pressures, `height_levels_m_agl` and `storm_elevations_m_asl` will not be used. Otherwise, `height_levels_m_agl` and `storm_elevations_m_asl` will be used to estimate the pressure levels for each sounding. :param sounding_matrix: numpy array (E x H_s x F_s) of soundings. :param field_names: list (length F_s) of field names, in the order that they appear in `sounding_matrix`. :param height_levels_m_agl: numpy array (length H_s) of height levels (metres above ground level), in the order that they appear in `sounding_matrix`. :param storm_elevations_m_asl: length-E numpy array of storm elevations (metres above sea level). :return: list_of_metpy_dictionaries: length-E list of dictionaries. The format of each dictionary is described in the input doc for `sounding_plotting.plot_sounding`. """ error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), num_dimensions=1) check_soundings(sounding_matrix=sounding_matrix, num_fields=len(field_names)) try: pressure_index = field_names.index(soundings.PRESSURE_NAME) pressure_matrix_pascals = sounding_matrix[..., pressure_index] except ValueError: error_checking.assert_is_geq_numpy_array(height_levels_m_agl, 0) error_checking.assert_is_numpy_array(height_levels_m_agl, num_dimensions=1) error_checking.assert_is_numpy_array_without_nan( storm_elevations_m_asl) error_checking.assert_is_numpy_array(storm_elevations_m_asl, num_dimensions=1) num_height_levels = len(height_levels_m_agl) num_examples = len(storm_elevations_m_asl) check_soundings(sounding_matrix=sounding_matrix, num_examples=num_examples, num_height_levels=num_height_levels) height_matrix_m_asl = numpy.full((num_examples, num_height_levels), numpy.nan) for i in range(num_examples): height_matrix_m_asl[i, ...] = (height_levels_m_agl + storm_elevations_m_asl[i]) pressure_matrix_pascals = standard_atmo.height_to_pressure( height_matrix_m_asl) try: temperature_index = field_names.index(soundings.TEMPERATURE_NAME) temperature_matrix_kelvins = sounding_matrix[..., temperature_index] except ValueError: virtual_pot_temp_index = field_names.index( soundings.VIRTUAL_POTENTIAL_TEMPERATURE_NAME) temperature_matrix_kelvins = ( temperature_conversions.temperatures_from_potential_temperatures( potential_temperatures_kelvins=sounding_matrix[ ..., virtual_pot_temp_index], total_pressures_pascals=pressure_matrix_pascals)) try: specific_humidity_index = field_names.index( soundings.SPECIFIC_HUMIDITY_NAME) dewpoint_matrix_kelvins = ( moisture_conversions.specific_humidity_to_dewpoint( specific_humidities_kg_kg01=sounding_matrix[ ..., specific_humidity_index], total_pressures_pascals=pressure_matrix_pascals)) except ValueError: relative_humidity_index = field_names.index( soundings.RELATIVE_HUMIDITY_NAME) dewpoint_matrix_kelvins = ( moisture_conversions.relative_humidity_to_dewpoint( relative_humidities=sounding_matrix[..., relative_humidity_index], temperatures_kelvins=temperature_matrix_kelvins, total_pressures_pascals=pressure_matrix_pascals)) temperature_matrix_celsius = temperature_conversions.kelvins_to_celsius( temperature_matrix_kelvins) dewpoint_matrix_celsius = temperature_conversions.kelvins_to_celsius( dewpoint_matrix_kelvins) try: u_wind_index = field_names.index(soundings.U_WIND_NAME) v_wind_index = field_names.index(soundings.V_WIND_NAME) include_wind = True except ValueError: include_wind = False num_examples = sounding_matrix.shape[0] list_of_metpy_dictionaries = [None] * num_examples for i in range(num_examples): list_of_metpy_dictionaries[i] = { soundings.PRESSURE_COLUMN_METPY: pressure_matrix_pascals[i, :] * PASCALS_TO_MB, soundings.TEMPERATURE_COLUMN_METPY: temperature_matrix_celsius[i, :], soundings.DEWPOINT_COLUMN_METPY: dewpoint_matrix_celsius[i, :], } if include_wind: list_of_metpy_dictionaries[i].update({ soundings.U_WIND_COLUMN_METPY: (sounding_matrix[i, ..., u_wind_index] * METRES_PER_SECOND_TO_KT), soundings.V_WIND_COLUMN_METPY: (sounding_matrix[i, ..., v_wind_index] * METRES_PER_SECOND_TO_KT) }) return list_of_metpy_dictionaries
def denormalize_soundings(sounding_matrix, field_names, normalization_type_string, normalization_param_file_name, test_mode=False, min_normalized_value=0., max_normalized_value=1., normalization_table=None): """Denormalizes soundings. This method is the inverse of `normalize_soundings`. :param sounding_matrix: See doc for `normalize_soundings`. :param field_names: Same. :param normalization_type_string: Same. :param normalization_param_file_name: Path to file with normalization params. Will be read by `read_normalization_params_from_file`. :param test_mode: For testing only. Leave this alone. :param min_normalized_value: Same. :param max_normalized_value: Same. :param normalization_table: For testing only. Leave this alone. :return: sounding_matrix: Denormalized version of input, with the same dimensions. """ error_checking.assert_is_boolean(test_mode) if not test_mode: normalization_table = read_normalization_params_from_file( normalization_param_file_name)[2] error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), num_dimensions=1) num_fields = len(field_names) check_soundings(sounding_matrix=sounding_matrix, num_fields=num_fields) _check_normalization_type(normalization_type_string) if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING: error_checking.assert_is_greater(max_normalized_value, min_normalized_value) # error_checking.assert_is_geq_numpy_array( # sounding_matrix, min_normalized_value) # error_checking.assert_is_leq_numpy_array( # sounding_matrix, max_normalized_value) for j in range(num_fields): if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING: this_min_value = normalization_table[MIN_VALUE_COLUMN].loc[ field_names[j]] this_max_value = normalization_table[MAX_VALUE_COLUMN].loc[ field_names[j]] sounding_matrix[..., j] = ( (sounding_matrix[..., j] - min_normalized_value) / (max_normalized_value - min_normalized_value)) sounding_matrix[..., j] = this_min_value + ( sounding_matrix[..., j] * (this_max_value - this_min_value)) else: this_mean = normalization_table[MEAN_VALUE_COLUMN].loc[ field_names[j]] this_standard_deviation = normalization_table[ STANDARD_DEVIATION_COLUMN].loc[field_names[j]] sounding_matrix[..., j] = this_mean + (this_standard_deviation * sounding_matrix[..., j]) return sounding_matrix
def find_many_raw_files( desired_times_unix_sec, spc_date_strings, data_source, field_names, top_directory_name, reflectivity_heights_m_asl=None, max_time_offset_for_az_shear_sec=DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC, max_time_offset_for_non_shear_sec=DEFAULT_MAX_TIME_OFFSET_FOR_NON_SHEAR_SEC ): """Finds raw file for each field/height pair and time step. N = number of input times T = number of unique input times F = number of field/height pairs :param desired_times_unix_sec: length-N numpy array with desired valid times. :param spc_date_strings: length-N list of corresponding SPC dates (format "yyyymmdd"). :param data_source: Data source ("myrorss" or "mrms"). :param field_names: 1-D list of field names. :param top_directory_name: Name of top-level directory with radar data from the given source. :param reflectivity_heights_m_asl: 1-D numpy array of heights (metres above sea level) for the field "reflectivity_dbz". If "reflectivity_dbz" is not in `field_names`, leave this as None. :param max_time_offset_for_az_shear_sec: Max time offset (between desired and actual valid time) for azimuthal-shear fields. :param max_time_offset_for_non_shear_sec: Max time offset (between desired and actual valid time) for non-azimuthal-shear fields. :return: file_dictionary: Dictionary with the following keys. file_dictionary['radar_file_name_matrix']: T-by-F numpy array of paths to raw files. file_dictionary['unique_times_unix_sec']: length-T numpy array of unique valid times. file_dictionary['spc_date_strings_for_unique_times']: length-T numpy array of corresponding SPC dates. file_dictionary['field_name_by_pair']: length-F list of field names. file_dictionary['height_by_pair_m_asl']: length-F numpy array of heights (metres above sea level). """ field_name_by_pair, height_by_pair_m_asl = ( myrorss_and_mrms_utils.fields_and_refl_heights_to_pairs( field_names=field_names, data_source=data_source, refl_heights_m_asl=reflectivity_heights_m_asl)) num_fields = len(field_name_by_pair) error_checking.assert_is_integer_numpy_array(desired_times_unix_sec) error_checking.assert_is_numpy_array(desired_times_unix_sec, num_dimensions=1) num_times = len(desired_times_unix_sec) error_checking.assert_is_string_list(spc_date_strings) error_checking.assert_is_numpy_array(numpy.array(spc_date_strings), exact_dimensions=numpy.array( [num_times])) spc_dates_unix_sec = numpy.array([ time_conversion.spc_date_string_to_unix_sec(s) for s in spc_date_strings ]) time_matrix = numpy.hstack( (numpy.reshape(desired_times_unix_sec, (num_times, 1)), numpy.reshape(spc_dates_unix_sec, (num_times, 1)))) unique_time_matrix = numpy.vstack( {tuple(this_row) for this_row in time_matrix}).astype(int) unique_times_unix_sec = unique_time_matrix[:, 0] spc_dates_at_unique_times_unix_sec = unique_time_matrix[:, 1] sort_indices = numpy.argsort(unique_times_unix_sec) unique_times_unix_sec = unique_times_unix_sec[sort_indices] spc_dates_at_unique_times_unix_sec = spc_dates_at_unique_times_unix_sec[ sort_indices] num_unique_times = len(unique_times_unix_sec) radar_file_name_matrix = numpy.full((num_unique_times, num_fields), '', dtype=object) for i in range(num_unique_times): this_spc_date_string = time_conversion.time_to_spc_date_string( spc_dates_at_unique_times_unix_sec[i]) for j in range(num_fields): if field_name_by_pair[j] in AZIMUTHAL_SHEAR_FIELD_NAMES: this_max_time_offset_sec = max_time_offset_for_az_shear_sec this_raise_error_flag = False else: this_max_time_offset_sec = max_time_offset_for_non_shear_sec this_raise_error_flag = True if this_max_time_offset_sec == 0: radar_file_name_matrix[i, j] = find_raw_file( unix_time_sec=unique_times_unix_sec[i], spc_date_string=this_spc_date_string, field_name=field_name_by_pair[j], data_source=data_source, top_directory_name=top_directory_name, height_m_asl=height_by_pair_m_asl[j], raise_error_if_missing=this_raise_error_flag) else: radar_file_name_matrix[i, j] = find_raw_file_inexact_time( desired_time_unix_sec=unique_times_unix_sec[i], spc_date_string=this_spc_date_string, field_name=field_name_by_pair[j], data_source=data_source, top_directory_name=top_directory_name, height_m_asl=height_by_pair_m_asl[j], max_time_offset_sec=this_max_time_offset_sec, raise_error_if_missing=this_raise_error_flag) if radar_file_name_matrix[i, j] is None: this_time_string = time_conversion.unix_sec_to_string( unique_times_unix_sec[i], TIME_FORMAT_FOR_LOG_MESSAGES) warning_string = ( 'Cannot find file for "{0:s}" at {1:d} metres ASL and ' '{2:s}.').format(field_name_by_pair[j], int(height_by_pair_m_asl[j]), this_time_string) warnings.warn(warning_string) return { RADAR_FILE_NAMES_KEY: radar_file_name_matrix, UNIQUE_TIMES_KEY: unique_times_unix_sec, SPC_DATES_AT_UNIQUE_TIMES_KEY: spc_dates_at_unique_times_unix_sec, FIELD_NAME_BY_PAIR_KEY: field_name_by_pair, HEIGHT_BY_PAIR_KEY: numpy.round(height_by_pair_m_asl).astype(int) }
def run_sfs_on_sklearn_model( training_predictor_matrix, training_target_values, validation_predictor_matrix, validation_target_values, predictor_names, model_object, cost_function, min_loss_decrease=None, min_percentage_loss_decrease=None, num_steps_for_loss_decrease=DEFAULT_NUM_STEPS_FOR_LOSS_DECREASE): """Runs sequential forward selection (SFS) on scikit-learn model. T = number of training examples V = number of validation examples P = number of predictors :param training_predictor_matrix: T-by-P numpy array of predictor values. :param training_target_values: length-T numpy array of target values (integer class labels, since this method supports only classification). :param validation_predictor_matrix: V-by-P numpy array of predictor values. :param validation_target_values: length-V numpy array of target values. :param predictor_names: length-P list with names of predictor variables. :param model_object: Instance of scikit-learn model. Must implement the methods `fit` and `predict_proba`. :param cost_function: Cost function (used to assess model on validation data). Should have the following inputs and outputs. Input: target_values: Same as input `validation_target_values` for this method. Input: class_probability_matrix: V-by-K matrix of class probabilities, where K = number of classes. class_probability_matrix[i, k] is the predicted probability that the [i]th example belongs to the [k]th class. Output: cost: Scalar value. :param min_loss_decrease: Used to determine stopping criterion. If the loss has decreased by less than `min_loss_decrease` over the last `num_steps_for_loss_decrease` steps of sequential selection, the algorithm will stop. :param min_percentage_loss_decrease: [used only if `min_loss_decrease is None`] Used to determine stopping criterion. If the loss has decreased by less than `min_percentage_loss_decrease` over the last `num_steps_for_loss_decrease` steps of sequential selection, the algorithm will stop. :param num_steps_for_loss_decrease: See above. :return: result_dict: See documentation for `run_sfs`. """ # TODO(thunderhoser): This method does not involve deep learning, so # shouldn't really be in this file. # Check input args. error_checking.assert_is_numpy_array_without_nan(training_predictor_matrix) error_checking.assert_is_numpy_array( training_predictor_matrix, num_dimensions=2) num_training_examples = training_predictor_matrix.shape[0] num_predictors = training_predictor_matrix.shape[1] error_checking.assert_is_integer_numpy_array(training_target_values) error_checking.assert_is_geq_numpy_array(training_target_values, 0) error_checking.assert_is_numpy_array( training_target_values, exact_dimensions=numpy.array([num_training_examples]) ) error_checking.assert_is_numpy_array_without_nan( validation_predictor_matrix) num_validation_examples = validation_predictor_matrix.shape[0] error_checking.assert_is_numpy_array( validation_predictor_matrix, exact_dimensions=numpy.array([num_validation_examples, num_predictors]) ) error_checking.assert_is_integer_numpy_array(validation_target_values) error_checking.assert_is_geq_numpy_array(validation_target_values, 0) error_checking.assert_is_numpy_array( validation_target_values, exact_dimensions=numpy.array([num_validation_examples]) ) error_checking.assert_is_string_list(predictor_names) error_checking.assert_is_numpy_array( numpy.array(predictor_names), exact_dimensions=numpy.array([num_predictors]) ) # Create climatological model. num_classes = 1 + max( [numpy.max(training_target_values), numpy.max(validation_target_values)] ) climo_validation_prob_matrix = numpy.full( (num_validation_examples, num_classes), numpy.nan) for k in range(num_classes): climo_validation_prob_matrix[..., k] = numpy.mean( training_target_values == k) climo_cost = cost_function(validation_target_values, climo_validation_prob_matrix) print('Cost of climatological model: {0:.4e}\n'.format(climo_cost)) # Do dirty work. remaining_predictor_names = predictor_names + [] selected_predictor_name_by_step = [] lowest_cost_by_step = [] step_num = 0 while len(remaining_predictor_names) > 0: print('\n') step_num += 1 lowest_cost = numpy.inf best_predictor_name = None for this_predictor_name in remaining_predictor_names: print(( 'Trying predictor "{0:s}" at step {1:d} of SFS... ' ).format(this_predictor_name, step_num)) these_indices = [ predictor_names.index(s) for s in selected_predictor_name_by_step ] these_indices.append(predictor_names.index(this_predictor_name)) these_indices = numpy.array(these_indices, dtype=int) this_training_matrix = training_predictor_matrix[..., these_indices] this_validation_matrix = validation_predictor_matrix[ ..., these_indices] new_model_object = sklearn.base.clone(model_object) new_model_object.fit(this_training_matrix, training_target_values) this_validation_prob_matrix = new_model_object.predict_proba( this_validation_matrix) this_cost = cost_function(validation_target_values, this_validation_prob_matrix) print('Validation loss after adding "{0:s}" = {1:.4e}\n'.format( this_predictor_name, this_cost)) if this_cost > lowest_cost: continue lowest_cost = this_cost + 0. best_predictor_name = this_predictor_name + '' stopping_criterion = _eval_sfs_stopping_criterion( min_loss_decrease=min_loss_decrease, min_percentage_loss_decrease=min_percentage_loss_decrease, num_steps_for_loss_decrease=num_steps_for_loss_decrease, lowest_cost_by_step=lowest_cost_by_step + [lowest_cost]) if stopping_criterion: break selected_predictor_name_by_step.append(best_predictor_name) lowest_cost_by_step.append(lowest_cost) remaining_predictor_names.remove(best_predictor_name) print('Best predictor = "{0:s}" ... new cost = {1:.4e}'.format( best_predictor_name, lowest_cost)) print(SEPARATOR_STRING) return { MIN_DECREASE_KEY: min_loss_decrease, MIN_PERCENT_DECREASE_KEY: min_percentage_loss_decrease, NUM_STEPS_FOR_DECREASE_KEY: num_steps_for_loss_decrease, SELECTED_PREDICTORS_KEY: selected_predictor_name_by_step, LOWEST_COSTS_KEY: lowest_cost_by_step }
def write_ungridded_predictions( netcdf_file_name, class_probability_matrix, storm_ids, storm_times_unix_sec, target_name, observed_labels=None): """Writes predictions to NetCDF file. K = number of classes E = number of examples (storm objects) :param netcdf_file_name: Path to output file. :param class_probability_matrix: E-by-K numpy array of forecast probabilities. :param storm_ids: length-E list of storm IDs (strings). :param storm_times_unix_sec: length-E numpy array of valid times. :param target_name: Name of target variable. :param observed_labels: [this may be None] length-E numpy array of observed labels (integers in 0...[K - 1]). """ # Check input args. error_checking.assert_is_numpy_array( class_probability_matrix, num_dimensions=2) error_checking.assert_is_geq_numpy_array(class_probability_matrix, 0.) error_checking.assert_is_leq_numpy_array(class_probability_matrix, 1.) num_examples = class_probability_matrix.shape[0] these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_string_list(storm_ids) error_checking.assert_is_numpy_array( numpy.array(storm_ids), exact_dimensions=these_expected_dim) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=these_expected_dim) target_val_utils.target_name_to_params(target_name) if observed_labels is not None: error_checking.assert_is_integer_numpy_array(observed_labels) error_checking.assert_is_numpy_array( observed_labels, exact_dimensions=these_expected_dim) # Write to NetCDF file. file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name) dataset_object = netCDF4.Dataset( netcdf_file_name, 'w', format='NETCDF3_64BIT_OFFSET') dataset_object.setncattr(TARGET_NAME_KEY, target_name) dataset_object.createDimension( EXAMPLE_DIMENSION_KEY, class_probability_matrix.shape[0] ) dataset_object.createDimension( CLASS_DIMENSION_KEY, class_probability_matrix.shape[1] ) if num_examples == 0: num_id_characters = 1 else: num_id_characters = 1 + numpy.max(numpy.array([ len(s) for s in storm_ids ])) dataset_object.createDimension(STORM_ID_CHAR_DIM_KEY, num_id_characters) # Add storm IDs. this_string_format = 'S{0:d}'.format(num_id_characters) storm_ids_char_array = netCDF4.stringtochar(numpy.array( storm_ids, dtype=this_string_format )) dataset_object.createVariable( STORM_IDS_KEY, datatype='S1', dimensions=(EXAMPLE_DIMENSION_KEY, STORM_ID_CHAR_DIM_KEY) ) dataset_object.variables[STORM_IDS_KEY][:] = numpy.array( storm_ids_char_array) # Add storm times. dataset_object.createVariable( STORM_TIMES_KEY, datatype=numpy.int32, dimensions=EXAMPLE_DIMENSION_KEY ) dataset_object.variables[STORM_TIMES_KEY][:] = storm_times_unix_sec # Add probabilities. dataset_object.createVariable( PROBABILITY_MATRIX_KEY, datatype=numpy.float32, dimensions=(EXAMPLE_DIMENSION_KEY, CLASS_DIMENSION_KEY) ) dataset_object.variables[PROBABILITY_MATRIX_KEY][:] = ( class_probability_matrix ) if observed_labels is not None: dataset_object.createVariable( OBSERVED_LABELS_KEY, datatype=numpy.int32, dimensions=EXAMPLE_DIMENSION_KEY ) dataset_object.variables[OBSERVED_LABELS_KEY][:] = observed_labels dataset_object.close()
def write_standard_file(pickle_file_name, denorm_predictor_matrices, cam_matrices, guided_cam_matrices, full_storm_id_strings, storm_times_unix_sec, model_file_name, target_class, target_layer_name, sounding_pressure_matrix_pa=None): """Writes class-activation maps (one per storm object) to Pickle file. E = number of examples (storm objects) H = number of sounding heights :param pickle_file_name: Path to output file. :param denorm_predictor_matrices: See doc for `_check_in_and_out_matrices`. :param cam_matrices: Same. :param guided_cam_matrices: Same. :param full_storm_id_strings: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to model that created saliency maps (readable by `cnn.read_model`). :param target_class: Target class. `cam_matrices` and `guided_cam_matrices` contain activations for the [k + 1]th class, where k = `target_class`. :param target_layer_name: Name of target layer. :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure levels. Needed only if the model is trained with soundings but without pressure as a predictor. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) error_checking.assert_is_string(target_layer_name) error_checking.assert_is_string_list(full_storm_id_strings) error_checking.assert_is_numpy_array(numpy.array(full_storm_id_strings), num_dimensions=1) num_examples = len(full_storm_id_strings) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array(storm_times_unix_sec, exact_dimensions=these_expected_dim) _check_in_and_out_matrices(predictor_matrices=denorm_predictor_matrices, num_examples=num_examples, cam_matrices=cam_matrices, guided_cam_matrices=guided_cam_matrices) if sounding_pressure_matrix_pa is not None: error_checking.assert_is_numpy_array_without_nan( sounding_pressure_matrix_pa) error_checking.assert_is_greater_numpy_array( sounding_pressure_matrix_pa, 0.) error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa, num_dimensions=2) these_expected_dim = numpy.array( (num_examples, ) + sounding_pressure_matrix_pa.shape[1:], dtype=int) error_checking.assert_is_numpy_array( sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim) gradcam_dict = { PREDICTOR_MATRICES_KEY: denorm_predictor_matrices, CAM_MATRICES_KEY: cam_matrices, GUIDED_CAM_MATRICES_KEY: guided_cam_matrices, MODEL_FILE_KEY: model_file_name, FULL_STORM_IDS_KEY: full_storm_id_strings, STORM_TIMES_KEY: storm_times_unix_sec, TARGET_CLASS_KEY: target_class, TARGET_LAYER_KEY: target_layer_name, SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(gradcam_dict, pickle_file_handle) pickle_file_handle.close()
def download_files_via_http(online_file_names, local_file_names, user_name=None, password=None, host_name=None, raise_error_if_fails=True): """Downloads files via HTTP. N = number of files to download :param online_file_names: length-N list of URLs. Example: "https://nomads.ncdc.noaa.gov/data/narr/201212/20121212/ narr-a_221_20121212_1200_000.grb" :param local_file_names: length-N list of target paths on local machine (to which files will be downloaded). :param user_name: User name on HTTP server. To login anonymously, leave this as None. :param password: Password on HTTP server. To login anonymously, leave this as None. :param host_name: Host name (base URL name) for HTTP server. Example: "https://nomads.ncdc.noaa.gov" :param raise_error_if_fails: Boolean flag. If True and download fails, this method will raise an error. :return: local_file_names: Same as input, except that if download failed for the [i]th file, local_file_names[i] = None. :raises: ValueError: if download failed and raise_error_if_fails = True. :raises: urllib2.HTTPError: if download failed for any reason not in `ACCEPTABLE_HTTP_ERROR_CODES` or `ACCEPTABLE_URL_ERROR_CODES`. This error will be raised regardless of the flag `raise_error_if_fails`. """ if not (user_name is None or password is None): error_checking.assert_is_string(user_name) error_checking.assert_is_string(password) error_checking.assert_is_string(host_name) manager_object = urllib.request.HTTPPasswordMgrWithDefaultRealm() manager_object.add_password(realm=None, uri=host_name, user=user_name, passwd=password) authentication_handler = urllib.request.HTTPBasicAuthHandler( manager_object) opener_object = urllib.request.build_opener(authentication_handler) urllib.request.install_opener(opener_object) error_checking.assert_is_string_list(online_file_names) error_checking.assert_is_numpy_array(numpy.asarray(online_file_names), num_dimensions=1) num_files = len(online_file_names) error_checking.assert_is_string_list(local_file_names) error_checking.assert_is_numpy_array(numpy.asarray(online_file_names), exact_dimensions=numpy.array( [num_files])) error_checking.assert_is_boolean(raise_error_if_fails) for i in range(num_files): this_download_succeeded = False this_response_object = None try: this_response_object = urllib.request.urlopen(online_file_names[i]) this_download_succeeded = True except urllib.error.HTTPError as this_error: if (raise_error_if_fails or this_error.code not in ACCEPTABLE_HTTP_ERROR_CODES): raise except urllib.error.URLError as this_error: error_words = this_error.reason.split() acceptable_error_flags = numpy.array( [w in str(ACCEPTABLE_URL_ERROR_CODES) for w in error_words], dtype=bool) if raise_error_if_fails or not numpy.any(acceptable_error_flags): raise if not this_download_succeeded: warnings.warn('Could not download file: {0:s}'.format( online_file_names[i])) local_file_names[i] = None continue file_system_utils.mkdir_recursive_if_necessary( file_name=local_file_names[i]) with open(local_file_names[i], 'wb') as this_file_handle: while True: this_chunk = this_response_object.read(NUM_BYTES_PER_BLOCK) if not this_chunk: break this_file_handle.write(this_chunk) if not os.path.isfile(local_file_names[i]): error_string = ( 'Could not download file. Local file expected at: "{0:s}"' ).format(local_file_names[i]) if raise_error_if_fails: raise ValueError(error_string) warnings.warn(error_string) local_file_names[i] = None return local_file_names
def write_standard_file(pickle_file_name, init_function_name_or_matrices, list_of_optimized_matrices, model_file_name, num_iterations, learning_rate, component_type_string, target_class=None, layer_name=None, neuron_indices=None, channel_index=None, ideal_activation=None, storm_ids=None, storm_times_unix_sec=None): """Writes optimized learning examples to Pickle file. E = number of examples (storm objects) :param pickle_file_name: Path to output file. :param init_function_name_or_matrices: See doc for `_do_gradient_descent`. The only difference here is that, if a function was used, the input argument must be the function *name* rather than the function itself. :param list_of_optimized_matrices: List of numpy arrays created by `_do_gradient_descent`. :param model_file_name: Path to file with trained model (readable by `cnn.read_model`). :param num_iterations: See doc for `_do_gradient_descent`. :param learning_rate: Same. :param component_type_string: See doc for `model_interpretation.check_component_metadata`. :param target_class: Same. :param layer_name: Same. :param neuron_indices: Same. :param channel_index: Same. :param ideal_activation: See doc for `optimize_input_for_neuron` or `optimize_input_for_channel`. :param storm_ids: [used only if `init_function_name_or_matrices` is list of matrices] length-E list of storm IDs (strings). :param storm_times_unix_sec: [used only if `init_function_name_or_matrices` is list of matrices] length-E numpy array of storm times. :raises: ValueError: if `init_function_name_or_matrices` is a list of numpy arrays and has a different length than `list_of_optimized_matrices`. """ model_interpretation.check_component_metadata( component_type_string=component_type_string, target_class=target_class, layer_name=layer_name, neuron_indices=neuron_indices, channel_index=channel_index) _check_input_args(num_iterations=num_iterations, learning_rate=learning_rate, ideal_activation=ideal_activation) error_checking.assert_is_string(model_file_name) error_checking.assert_is_list(list_of_optimized_matrices) if isinstance(init_function_name_or_matrices, str): num_storm_objects = None else: num_init_matrices = len(init_function_name_or_matrices) num_optimized_matrices = len(list_of_optimized_matrices) if num_init_matrices != num_optimized_matrices: error_string = ( 'Number of input matrices ({0:d}) should equal number of output' ' matrices ({1:d}).').format(num_init_matrices, num_optimized_matrices) raise ValueError(error_string) error_checking.assert_is_string_list(storm_ids) error_checking.assert_is_numpy_array(numpy.array(storm_ids), num_dimensions=1) num_storm_objects = len(storm_ids) these_expected_dim = numpy.array([num_storm_objects], dtype=int) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=these_expected_dim) num_matrices = len(list_of_optimized_matrices) for i in range(num_matrices): error_checking.assert_is_numpy_array_without_nan( list_of_optimized_matrices[i]) if num_storm_objects is not None: these_expected_dim = numpy.array( (num_storm_objects, ) + list_of_optimized_matrices[i].shape[1:], dtype=int) error_checking.assert_is_numpy_array( list_of_optimized_matrices[i], exact_dimensions=these_expected_dim) if not isinstance(init_function_name_or_matrices, str): error_checking.assert_is_numpy_array_without_nan( init_function_name_or_matrices[i]) these_expected_dim = numpy.array( list_of_optimized_matrices[i].shape, dtype=int) error_checking.assert_is_numpy_array( init_function_name_or_matrices[i], exact_dimensions=these_expected_dim) optimization_dict = { INIT_FUNCTION_KEY: init_function_name_or_matrices, OPTIMIZED_MATRICES_KEY: list_of_optimized_matrices, MODEL_FILE_NAME_KEY: model_file_name, NUM_ITERATIONS_KEY: num_iterations, LEARNING_RATE_KEY: learning_rate, COMPONENT_TYPE_KEY: component_type_string, TARGET_CLASS_KEY: target_class, LAYER_NAME_KEY: layer_name, IDEAL_ACTIVATION_KEY: ideal_activation, NEURON_INDICES_KEY: neuron_indices, CHANNEL_INDEX_KEY: channel_index, STORM_IDS_KEY: storm_ids, STORM_TIMES_KEY: storm_times_unix_sec } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(optimization_dict, pickle_file_handle) pickle_file_handle.close()
def _run(evaluation_file_names, line_styles, line_colour_strings, set_descriptions_verbose, confidence_level, use_log_scale, plot_by_height, output_dir_name): """Plots model evaluation. This is effectively the main method. :param evaluation_file_names: See documentation at top of file. :param line_styles: Same. :param line_colour_strings: Same. :param set_descriptions_verbose: Same. :param confidence_level: Same. :param use_log_scale: Same. :param plot_by_height: Same. :param output_dir_name: Same. """ # Check input args. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) if confidence_level < 0: confidence_level = None if confidence_level is not None: error_checking.assert_is_geq(confidence_level, 0.9) error_checking.assert_is_less_than(confidence_level, 1.) num_evaluation_sets = len(evaluation_file_names) expected_dim = numpy.array([num_evaluation_sets], dtype=int) error_checking.assert_is_string_list(line_styles) error_checking.assert_is_numpy_array(numpy.array(line_styles), exact_dimensions=expected_dim) error_checking.assert_is_string_list(set_descriptions_verbose) error_checking.assert_is_numpy_array(numpy.array(set_descriptions_verbose), exact_dimensions=expected_dim) set_descriptions_verbose = [ s.replace('_', ' ') for s in set_descriptions_verbose ] set_descriptions_abbrev = [ s.lower().replace(' ', '-') for s in set_descriptions_verbose ] error_checking.assert_is_string_list(line_colour_strings) error_checking.assert_is_numpy_array(numpy.array(line_colour_strings), exact_dimensions=expected_dim) line_colours = [ numpy.fromstring(s, dtype=float, sep='_') / 255 for s in line_colour_strings ] for i in range(num_evaluation_sets): error_checking.assert_is_numpy_array(line_colours[i], exact_dimensions=numpy.array( [3], dtype=int)) error_checking.assert_is_geq_numpy_array(line_colours[i], 0.) error_checking.assert_is_leq_numpy_array(line_colours[i], 1.) # Read files. evaluation_tables_xarray = [xarray.Dataset()] * num_evaluation_sets prediction_dicts = [dict()] * num_evaluation_sets for i in range(num_evaluation_sets): print('Reading data from: "{0:s}"...'.format(evaluation_file_names[i])) evaluation_tables_xarray[i] = evaluation.read_file( evaluation_file_names[i]) this_prediction_file_name = ( evaluation_tables_xarray[i].attrs[evaluation.PREDICTION_FILE_KEY]) print( 'Reading data from: "{0:s}"...'.format(this_prediction_file_name)) prediction_dicts[i] = prediction_io.read_file( this_prediction_file_name) model_file_name = ( evaluation_tables_xarray[0].attrs[evaluation.MODEL_FILE_KEY]) model_metafile_name = neural_net.find_metafile( model_dir_name=os.path.split(model_file_name)[0], raise_error_if_missing=True) print('Reading metadata from: "{0:s}"...'.format(model_metafile_name)) model_metadata_dict = neural_net.read_metafile(model_metafile_name) generator_option_dict = model_metadata_dict[ neural_net.TRAINING_OPTIONS_KEY] scalar_target_names = ( generator_option_dict[neural_net.SCALAR_TARGET_NAMES_KEY]) vector_target_names = ( generator_option_dict[neural_net.VECTOR_TARGET_NAMES_KEY]) heights_m_agl = generator_option_dict[neural_net.HEIGHTS_KEY] try: t = evaluation_tables_xarray[0] aux_target_names = t.coords[evaluation.AUX_TARGET_FIELD_DIM].values except: aux_target_names = [] num_scalar_targets = len(scalar_target_names) num_vector_targets = len(vector_target_names) num_heights = len(heights_m_agl) num_aux_targets = len(aux_target_names) example_dict = { example_utils.SCALAR_TARGET_NAMES_KEY: scalar_target_names, example_utils.VECTOR_TARGET_NAMES_KEY: vector_target_names, example_utils.HEIGHTS_KEY: heights_m_agl, example_utils.SCALAR_PREDICTOR_NAMES_KEY: generator_option_dict[neural_net.SCALAR_PREDICTOR_NAMES_KEY], example_utils.VECTOR_PREDICTOR_NAMES_KEY: generator_option_dict[neural_net.VECTOR_PREDICTOR_NAMES_KEY] } normalization_file_name = ( generator_option_dict[neural_net.NORMALIZATION_FILE_KEY]) print(('Reading training examples (for climatology) from: "{0:s}"...' ).format(normalization_file_name)) training_example_dict = example_io.read_file(normalization_file_name) training_example_dict = example_utils.subset_by_height( example_dict=training_example_dict, heights_m_agl=heights_m_agl) mean_training_example_dict = normalization.create_mean_example( new_example_dict=example_dict, training_example_dict=training_example_dict) print(SEPARATOR_STRING) # Do actual stuff. _plot_error_distributions( prediction_dicts=prediction_dicts, model_metadata_dict=model_metadata_dict, aux_target_names=aux_target_names, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, output_dir_name=output_dir_name) print(SEPARATOR_STRING) _plot_reliability_by_height( evaluation_tables_xarray=evaluation_tables_xarray, vector_target_names=vector_target_names, heights_m_agl=heights_m_agl, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, output_dir_name=output_dir_name) print(SEPARATOR_STRING) for k in range(num_vector_targets): for this_score_name in list(SCORE_NAME_TO_PROFILE_KEY.keys()): _plot_score_profile( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, target_name=vector_target_names[k], score_name=this_score_name, use_log_scale=use_log_scale, output_dir_name=output_dir_name) print(SEPARATOR_STRING) for k in range(num_scalar_targets): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, target_name=scalar_target_names[k], output_dir_name=output_dir_name) for k in range(num_aux_targets): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, target_name=aux_target_names[k], output_dir_name=output_dir_name) if not plot_by_height: return print(SEPARATOR_STRING) for k in range(num_vector_targets): for j in range(num_heights): _plot_attributes_diagram( evaluation_tables_xarray=evaluation_tables_xarray, line_styles=line_styles, line_colours=line_colours, set_descriptions_abbrev=set_descriptions_abbrev, set_descriptions_verbose=set_descriptions_verbose, confidence_level=confidence_level, mean_training_example_dict=mean_training_example_dict, height_m_agl=heights_m_agl[j], target_name=vector_target_names[k], output_dir_name=output_dir_name) if k != num_vector_targets - 1: print(SEPARATOR_STRING)
def test_assert_is_string_list_true(self): """Checks assert_is_string_list when input is string list.""" error_checking.assert_is_string_list(STRING_LIST)
def write_target_values(storm_to_events_table, target_names, netcdf_file_name): """Writes target values to NetCDF file. :param storm_to_events_table: pandas DataFrame created by `create_wind_regression_targets`, `create_wind_classification_targets`, or `create_tornado_targets`. :param target_names: 1-D list with names of target variables to write. Each name must be a column in `storm_to_events_table`. :param netcdf_file_name: Path to output file. :raises: ValueError: if any item in `target_names` is not a valid name. """ error_checking.assert_is_string_list(target_names) error_checking.assert_is_numpy_array( numpy.array(target_names), num_dimensions=1 ) for this_target_name in target_names: this_param_dict = target_name_to_params(this_target_name) if this_param_dict is not None: continue error_string = ( '"{0:s}" is not a valid name for a target variable.' ).format(this_target_name) raise ValueError(error_string) file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name) netcdf_dataset = netCDF4.Dataset( netcdf_file_name, 'w', format='NETCDF3_64BIT_OFFSET') full_id_strings = storm_to_events_table[ tracking_utils.FULL_ID_COLUMN].values num_storm_objects = len(full_id_strings) num_id_characters = 0 for i in range(num_storm_objects): num_id_characters = max([ num_id_characters, len(full_id_strings[i]) ]) netcdf_dataset.createDimension( STORM_OBJECT_DIMENSION_KEY, num_storm_objects) netcdf_dataset.createDimension(CHARACTER_DIMENSION_KEY, num_id_characters) netcdf_dataset.createVariable( FULL_IDS_KEY, datatype='S1', dimensions=(STORM_OBJECT_DIMENSION_KEY, CHARACTER_DIMENSION_KEY) ) string_type = 'S{0:d}'.format(num_id_characters) full_ids_char_array = netCDF4.stringtochar(numpy.array( full_id_strings, dtype=string_type )) netcdf_dataset.variables[FULL_IDS_KEY][:] = numpy.array(full_ids_char_array) netcdf_dataset.createVariable( VALID_TIMES_KEY, datatype=numpy.int32, dimensions=STORM_OBJECT_DIMENSION_KEY ) netcdf_dataset.variables[VALID_TIMES_KEY][:] = storm_to_events_table[ tracking_utils.VALID_TIME_COLUMN].values for this_target_name in target_names: netcdf_dataset.createVariable( this_target_name, datatype=numpy.float32, dimensions=STORM_OBJECT_DIMENSION_KEY ) netcdf_dataset.variables[this_target_name][:] = storm_to_events_table[ this_target_name].values netcdf_dataset.close()
def plot_predictors( example_dict, example_index, predictor_names, predictor_colours, predictor_line_widths, predictor_line_styles, use_log_scale, include_units=True, handle_dict=None): """Plots several predictors on the same set of axes. P = number of predictors to plot (must all be profiles) :param example_dict: See doc for `example_io.read_file`. :param example_index: Will plot the [i]th example, where i = `example_index`. :param predictor_names: length-P list with names of predictors to plot. :param predictor_colours: length-P list of colours (each colour in any format accepted by matplotlib). :param predictor_line_widths: length-P numpy array of line widths. :param predictor_line_styles: length-P list of line styles (each style in any format accepted by matplotlib). :param use_log_scale: Boolean flag. If True, will plot height (y-axis) in logarithmic scale. If False, will plot height in linear scale. :param include_units: Boolean flag. If True, axis titles will include units and values will be converted from default to plotting units. If False, axis titles will *not* include units and this method will *not* convert units. :param handle_dict: See output doc. If None, will create new figure on the fly. :return: handle_dict: Dictionary with the following keys. handle_dict['figure_object']: Figure handle (instance of `matplotlib.figure.Figure`). handle_dict['axes_objects']: length-P list of axes handles (each an instance of `matplotlib.axes._subplots.AxesSubplot`). """ # Check input args. error_checking.assert_is_integer(example_index) error_checking.assert_is_geq(example_index, 0) error_checking.assert_is_boolean(use_log_scale) error_checking.assert_is_boolean(include_units) error_checking.assert_is_string_list(predictor_names) num_predictors = len(predictor_names) error_checking.assert_is_leq(num_predictors, 4) for k in range(num_predictors): assert predictor_names[k] in example_utils.ALL_PREDICTOR_NAMES # assert predictor_names[k] in example_utils.ALL_VECTOR_PREDICTOR_NAMES assert len(predictor_colours) == num_predictors assert len(predictor_line_widths) == num_predictors assert len(predictor_line_styles) == num_predictors # Housekeeping. _set_font_size(FANCY_FONT_SIZE) if handle_dict is None: figure_object, first_axes_object = pyplot.subplots( 1, 1, figsize=(FANCY_FIGURE_WIDTH_INCHES, FANCY_FIGURE_HEIGHT_INCHES) ) axes_objects = [first_axes_object] figure_object.subplots_adjust(bottom=0.75) if use_log_scale: pyplot.yscale('log') for k in range(1, num_predictors): axes_objects.append(axes_objects[0].twiny()) if k == 2: axes_objects[k].spines['top'].set_position(('axes', 1.15)) _make_spines_invisible(axes_objects[k]) axes_objects[k].spines['top'].set_visible(True) if k == 3: axes_objects[k].xaxis.set_ticks_position('bottom') axes_objects[k].xaxis.set_label_position('bottom') axes_objects[k].spines['bottom'].set_position(('axes', -0.15)) _make_spines_invisible(axes_objects[k]) axes_objects[k].spines['bottom'].set_visible(True) else: figure_object = handle_dict[FIGURE_HANDLE_KEY] axes_objects = handle_dict[AXES_OBJECTS_KEY] heights_km_agl = METRES_TO_KM * example_dict[example_utils.HEIGHTS_KEY] tick_mark_dict = dict(size=4, width=1.5) for k in range(num_predictors): if predictor_names[k] in example_utils.ALL_SCALAR_PREDICTOR_NAMES: # TODO(thunderhoser): This is a HACK to deal with saliency maps. j = example_dict[example_utils.SCALAR_PREDICTOR_NAMES_KEY].index( predictor_names[k] ) these_predictor_values = ( example_dict[example_utils.SCALAR_PREDICTOR_VALS_KEY][ example_index, :, j ] ) else: these_predictor_values = example_utils.get_field_from_dict( example_dict=example_dict, field_name=predictor_names[k] )[example_index, ...] if include_units: if predictor_names[k] == example_utils.TEMPERATURE_NAME: these_predictor_values = temperature_conv.kelvins_to_celsius( these_predictor_values ) else: these_predictor_values = ( PREDICTOR_NAME_TO_CONV_FACTOR[predictor_names[k]] * these_predictor_values ) axes_objects[k].plot( these_predictor_values, heights_km_agl, color=predictor_colours[k], linewidth=predictor_line_widths[k], linestyle=predictor_line_styles[k] ) x_label_string = copy.deepcopy( PREDICTOR_NAME_TO_VERBOSE[predictor_names[k]] ) if not include_units: x_label_string = x_label_string.split(' (')[0] axes_objects[k].set_xlabel(x_label_string) axes_objects[k].xaxis.label.set_color(predictor_colours[k]) axes_objects[k].tick_params( axis='x', colors=predictor_colours[k], **tick_mark_dict ) axes_objects[0].set_ylabel('Height (km AGL)') axes_objects[0].set_ylim([ numpy.min(heights_km_agl), numpy.max(heights_km_agl) ]) height_strings = create_height_labels( tick_values_km_agl=axes_objects[0].get_yticks(), use_log_scale=use_log_scale ) axes_objects[0].set_yticklabels(height_strings) axes_objects[0].tick_params(axis='y', **tick_mark_dict) return { FIGURE_HANDLE_KEY: figure_object, AXES_OBJECTS_KEY: axes_objects }
def unzip_1day_tar_file( tar_file_name, field_names, spc_date_string, top_target_directory_name, refl_heights_m_asl=None): """Unzips 1-day tar file (containing raw MYRORSS data for one SPC date). :param tar_file_name: Path to input file. :param field_names: 1-D list with names of radar fields. :param spc_date_string: SPC date (format "yyyymmdd"). :param top_target_directory_name: Name of top-level directory for unzipped MYRORSS files. This method will create a subdirectory therein for the SPC date. :param refl_heights_m_asl: 1-D numpy array of reflectivity heights (metres above sea level). :return: target_directory_name: Path to output directory. """ # Verification. _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string) error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array( numpy.asarray(field_names), num_dimensions=1) error_checking.assert_is_string(top_target_directory_name) # Put azimuthal-shear fields (which are allowed to be missing) at the end. # This way, if the tar command errors out due to missing data, it will do so # after unzipping all the non-missing data. field_names_removed = [] for this_field_name in AZIMUTHAL_RADAR_FIELD_NAMES: if this_field_name in field_names: field_names.remove(this_field_name) field_names_removed.append(this_field_name) for this_field_name in field_names_removed: field_names.append(this_field_name) field_to_heights_dict_m_asl = ( myrorss_and_mrms_utils.fields_and_refl_heights_to_dict( field_names=field_names, data_source=radar_utils.MYRORSS_SOURCE_ID, refl_heights_m_asl=refl_heights_m_asl)) target_directory_name = '{0:s}/{1:s}/{2:s}'.format( top_target_directory_name, spc_date_string[:4], spc_date_string ) field_names = list(field_to_heights_dict_m_asl.keys()) directory_names_to_unzip = [] for this_field_name in field_names: these_heights_m_asl = field_to_heights_dict_m_asl[this_field_name] for this_height_m_asl in these_heights_m_asl: directory_names_to_unzip.append( myrorss_and_mrms_io.get_relative_dir_for_raw_files( field_name=this_field_name, data_source=radar_utils.MYRORSS_SOURCE_ID, height_m_asl=this_height_m_asl)) unzipping.unzip_tar( tar_file_name, target_directory_name=target_directory_name, file_and_dir_names_to_unzip=directory_names_to_unzip) return target_directory_name
def read_target_values(netcdf_file_name, target_names=None): """Reads target values from NetCDF file. E = number of examples (storm objects) T = number of target variables :param netcdf_file_name: Path to input file. :param target_names: 1-D list with names of target variables to read. If None, will read all target variables. :return: storm_label_dict: Dictionary with the following keys. storm_label_dict['full_id_strings']: length-E list of full storm IDs. storm_label_dict['valid_times_unix_sec']: length-E numpy array of valid times. storm_label_dict['target_names']: length-T list with names of target variables. storm_label_dict['target_matrix']: E-by-T of target values (integer class labels). """ netcdf_dataset = netcdf_io.open_netcdf( netcdf_file_name=netcdf_file_name, raise_error_if_fails=True) try: full_id_strings = netCDF4.chartostring( netcdf_dataset.variables[FULL_IDS_KEY][:] ) except KeyError: full_id_strings = netCDF4.chartostring( netcdf_dataset.variables['storm_ids'][:] ) valid_times_unix_sec = numpy.array( netcdf_dataset.variables[VALID_TIMES_KEY][:], dtype=int ) if target_names is None: target_names = list(netcdf_dataset.variables.keys()) target_names.remove(FULL_IDS_KEY) target_names.remove(VALID_TIMES_KEY) error_checking.assert_is_string_list(target_names) error_checking.assert_is_numpy_array( numpy.array(target_names), num_dimensions=1 ) num_storm_objects = len(full_id_strings) target_matrix = None for this_target_name in target_names: these_target_values = numpy.array( netcdf_dataset.variables[this_target_name][:], dtype=int ) these_target_values = numpy.reshape( these_target_values, (num_storm_objects, 1) ) if target_matrix is None: target_matrix = these_target_values + 0 else: target_matrix = numpy.concatenate( (target_matrix, these_target_values), axis=1 ) netcdf_dataset.close() return { FULL_IDS_KEY: [str(f) for f in full_id_strings], VALID_TIMES_KEY: valid_times_unix_sec, TARGET_NAMES_KEY: target_names, TARGET_MATRIX_KEY: target_matrix }
def _check_input_args( list_of_training_matrices, training_target_values, list_of_validation_matrices, validation_target_values, predictor_names_by_matrix): """Error-checks input arguments for sequential selection. N = number of input matrices T = number of training examples V = number of validation examples C_q = number of channels (predictors) in the [q]th matrix :param list_of_training_matrices: length-N list of matrices (numpy arrays). The first axis of each matrix should have length T. :param training_target_values: length-T numpy array of target values (integer class labels). :param list_of_validation_matrices: length-N list of numpy arrays. The first axis of each matrix should have length V; otherwise, list_of_validation_matrices[q] should have the same dimensions as list_of_training_matrices[q]. :param validation_target_values: length-V numpy array of target values (integer class labels). :param predictor_names_by_matrix: length-N list of lists. The [q]th list should be a list of predictor variables in the [q]th matrix, with length C_q. :raises: ValueError: if length of `list_of_training_matrices` != length of `list_of_validation_matrices`. :raises: ValueError: if length of `list_of_training_matrices` != length of `predictor_names_by_matrix`. :raises: ValueError: if any input matrix has < 3 dimensions. """ error_checking.assert_is_integer_numpy_array(training_target_values) error_checking.assert_is_geq_numpy_array(training_target_values, 0) error_checking.assert_is_integer_numpy_array(validation_target_values) error_checking.assert_is_geq_numpy_array(validation_target_values, 0) num_input_matrices = len(list_of_training_matrices) if len(list_of_validation_matrices) != num_input_matrices: error_string = ( 'Number of training matrices ({0:d}) should equal number of ' 'validation matrices ({1:d}).' ).format(num_input_matrices, len(list_of_validation_matrices)) raise ValueError(error_string) if len(predictor_names_by_matrix) != num_input_matrices: error_string = ( 'Number of predictor-name lists ({0:d}) should equal number of ' 'validation matrices ({1:d}).' ).format(num_input_matrices, len(predictor_names_by_matrix)) raise ValueError(error_string) num_training_examples = len(training_target_values) num_validation_examples = len(validation_target_values) for q in range(num_input_matrices): error_checking.assert_is_numpy_array_without_nan( list_of_training_matrices[q]) error_checking.assert_is_numpy_array_without_nan( list_of_validation_matrices[q]) this_num_dimensions = len(list_of_training_matrices[q].shape) if this_num_dimensions < 3: error_string = ( '{0:d}th training matrix has {1:d} dimensions. Should have at ' 'least 3.' ).format(q + 1, this_num_dimensions) raise ValueError(error_string) this_num_dimensions = len(list_of_validation_matrices[q].shape) if this_num_dimensions < 3: error_string = ( '{0:d}th validation matrix has {1:d} dimensions. Should have ' 'at least 3.' ).format(q + 1, this_num_dimensions) raise ValueError(error_string) error_checking.assert_is_string_list(predictor_names_by_matrix[q]) this_num_predictors = len(predictor_names_by_matrix[q]) these_expected_dimensions = ( (num_training_examples,) + list_of_training_matrices[q].shape[1:-1] + (this_num_predictors,) ) these_expected_dimensions = numpy.array( these_expected_dimensions, dtype=int) error_checking.assert_is_numpy_array( list_of_training_matrices[q], exact_dimensions=these_expected_dimensions) these_expected_dimensions = ( (num_validation_examples,) + list_of_validation_matrices[q].shape[1:-1] + (this_num_predictors,) ) these_expected_dimensions = numpy.array( these_expected_dimensions, dtype=int) error_checking.assert_is_numpy_array( list_of_validation_matrices[q], exact_dimensions=these_expected_dimensions)
def normalize_radar_images(radar_image_matrix, field_names, normalization_type_string, normalization_param_file_name, test_mode=False, min_normalized_value=0., max_normalized_value=1., normalization_table=None): """Normalizes radar images. If normalization_type_string = "z", z-score normalization is done for each field independently. Means and standard deviations are read from the normalization file. If normalization_type_string = "minmax", min-max normalization is done for each field independently, using the following equations. Climatological minima and maxima are read from the normalization file. x_unscaled(i, j) = [x(i, j) - x_min] / [x_max - x_min] x_scaled(i, j) = x_unscaled(i, j) * [ max_normalized_value - min_normalized_value ] + min_normalized_value x(i, j) = original value at pixel (i, j) x_min = climatological minimum for field x x_max = climatological max for field x x_unscaled(i, j) = normalized but unscaled value at pixel (i, j) min_normalized_value: from input args max_normalized_value: from input args x_scaled(i, j) = normalized and scaled value at pixel (i, j) :param radar_image_matrix: numpy array of radar images. Dimensions may be E x M x N x C or E x M x N x H_r x F_r. :param field_names: 1-D list with names of radar fields, in the order that they appear in radar_image_matrix. If radar_image_matrix is 4-dimensional, field_names must have length C. If radar_image_matrix is 5-dimensional, field_names must have length F_r. Each field name must be accepted by `radar_utils.check_field_name`. :param normalization_type_string: Normalization type (must be accepted by `_check_normalization_type`). :param normalization_param_file_name: Path to file with normalization params. Will be read by `read_normalization_params_from_file`. :param test_mode: For testing only. Leave this alone. :param min_normalized_value: [used only if normalization_type_string = "minmax"] Minimum normalized value. :param max_normalized_value: [used only if normalization_type_string = "minmax"] Max normalized value. :param normalization_table: For testing only. Leave this alone. :return: radar_image_matrix: Normalized version of input, with the same dimensions. """ error_checking.assert_is_boolean(test_mode) if not test_mode: normalization_table = read_normalization_params_from_file( normalization_param_file_name)[0] check_radar_images(radar_image_matrix=radar_image_matrix, min_num_dimensions=4, max_num_dimensions=5) num_fields = radar_image_matrix.shape[-1] error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), exact_dimensions=numpy.array( [num_fields])) _check_normalization_type(normalization_type_string) if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING: error_checking.assert_is_greater(max_normalized_value, min_normalized_value) for j in range(num_fields): if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING: this_min_value = normalization_table[MIN_VALUE_COLUMN].loc[ field_names[j]] this_max_value = normalization_table[MAX_VALUE_COLUMN].loc[ field_names[j]] radar_image_matrix[..., j] = ( (radar_image_matrix[..., j] - this_min_value) / (this_max_value - this_min_value)) radar_image_matrix[..., j] = min_normalized_value + ( radar_image_matrix[..., j] * (max_normalized_value - min_normalized_value)) else: this_mean = normalization_table[MEAN_VALUE_COLUMN].loc[ field_names[j]] this_standard_deviation = normalization_table[ STANDARD_DEVIATION_COLUMN].loc[field_names[j]] radar_image_matrix[..., j] = ((radar_image_matrix[..., j] - this_mean) / this_standard_deviation) return radar_image_matrix
def find_examples(all_id_strings, desired_id_strings, allow_missing=False): """Finds examples with desired IDs. E = number of desired examples :param all_id_strings: 1-D list with all example IDs. :param desired_id_strings: length-E list of desired IDs. :param allow_missing: Boolean flag. If True, will allow some desired IDs to be missing. If False, will raise error if any desired ID is missing. :return: desired_indices: length-E numpy array with indices of desired examples. Missing IDs are denoted by an index of -1. :raises: ValueError: if either list of IDs has non-unique entries. :raises: ValueError: if `allow_missing == False` and any desired ID is missing. """ error_checking.assert_is_string_list(all_id_strings) error_checking.assert_is_string_list(desired_id_strings) error_checking.assert_is_boolean(allow_missing) all_id_strings_numpy = numpy.array(all_id_strings) desired_id_strings_numpy = numpy.array(desired_id_strings) these_unique_strings, these_counts = numpy.unique(all_id_strings_numpy, return_counts=True) if numpy.any(these_counts > 1): these_indices = numpy.where(these_counts > 1)[0] error_string = ( '\nall_id_strings contains {0:d} repeated entries, listed below:' '\n{1:s}').format(len(these_indices), str(these_unique_strings[these_indices])) raise ValueError(error_string) these_unique_strings, these_counts = numpy.unique(desired_id_strings_numpy, return_counts=True) if numpy.any(these_counts > 1): these_indices = numpy.where(these_counts > 1)[0] error_string = ( '\ndesired_id_strings contains {0:d} repeated entries, listed ' 'below:\n{1:s}').format(len(these_indices), str(these_unique_strings[these_indices])) raise ValueError(error_string) sort_indices = numpy.argsort(all_id_strings_numpy) desired_indices = numpy.searchsorted(all_id_strings_numpy[sort_indices], desired_id_strings_numpy, side='left').astype(int) desired_indices = sort_indices[desired_indices] desired_indices = numpy.maximum(desired_indices, 0) desired_indices = numpy.minimum(desired_indices, len(all_id_strings) - 1) if allow_missing: bad_indices = numpy.where( all_id_strings_numpy[desired_indices] != desired_id_strings_numpy )[0] desired_indices[bad_indices] = -1 return desired_indices if numpy.array_equal(all_id_strings_numpy[desired_indices], desired_id_strings_numpy): return desired_indices missing_flags = (all_id_strings_numpy[desired_indices] != desired_id_strings_numpy) error_string = ( '{0:d} of {1:d} desired IDs (listed below) are missing:\n{2:s}' ).format(numpy.sum(missing_flags), len(desired_id_strings), str(desired_id_strings_numpy[missing_flags])) raise ValueError(error_string)
def normalize_soundings(sounding_matrix, field_names, normalization_type_string, normalization_param_file_name, test_mode=False, min_normalized_value=0., max_normalized_value=1., normalization_table=None): """Normalizes soundings. This method uses the same equations as `normalize_radar_images`. :param sounding_matrix: numpy array (E x H_s x F_s) of soundings. :param field_names: list (length F_s) of field names, in the order that they appear in `sounding_matrix`. :param normalization_type_string: Normalization type (must be accepted by `_check_normalization_type`). :param normalization_param_file_name: Path to file with normalization params. Will be read by `read_normalization_params_from_file`. :param test_mode: For testing only. Leave this alone. :param min_normalized_value: [used only if normalization_type_string = "minmax"] Minimum normalized value. :param max_normalized_value: [used only if normalization_type_string = "minmax"] Max normalized value. :param normalization_table: For testing only. Leave this alone. :return: sounding_matrix: Normalized version of input, with the same dimensions. """ error_checking.assert_is_boolean(test_mode) if not test_mode: normalization_table = read_normalization_params_from_file( normalization_param_file_name)[2] error_checking.assert_is_string_list(field_names) error_checking.assert_is_numpy_array(numpy.array(field_names), num_dimensions=1) num_fields = len(field_names) check_soundings(sounding_matrix=sounding_matrix, num_fields=num_fields) _check_normalization_type(normalization_type_string) if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING: error_checking.assert_is_greater(max_normalized_value, min_normalized_value) for j in range(num_fields): if normalization_type_string == MINMAX_NORMALIZATION_TYPE_STRING: this_min_value = normalization_table[MIN_VALUE_COLUMN].loc[ field_names[j]] this_max_value = normalization_table[MAX_VALUE_COLUMN].loc[ field_names[j]] sounding_matrix[..., j] = ((sounding_matrix[..., j] - this_min_value) / (this_max_value - this_min_value)) sounding_matrix[..., j] = min_normalized_value + ( sounding_matrix[..., j] * (max_normalized_value - min_normalized_value)) else: this_mean = normalization_table[MEAN_VALUE_COLUMN].loc[ field_names[j]] this_standard_deviation = normalization_table[ STANDARD_DEVIATION_COLUMN].loc[field_names[j]] sounding_matrix[..., j] = ((sounding_matrix[..., j] - this_mean) / this_standard_deviation) return sounding_matrix