def get_rotations(num_rotations, max_absolute_rotation_angle_deg): """Creates an array of rotation angles. These angles are meant for use in `rotate_radar_images`. N = number of rotations :param num_rotations: Number of rotations. Image will be rotated only in the xy-plane (about the z-axis). :param max_absolute_rotation_angle_deg: Max absolute rotation angle (degrees). In general, the image will be rotated both clockwise and counterclockwise, up to this angle. :return: ccw_rotation_angles_deg: length-N numpy array of counterclockwise rotation angles (degrees). """ error_checking.assert_is_integer(num_rotations) if num_rotations == 0: return numpy.array([], dtype=float) error_checking.assert_is_greater(num_rotations, 0) error_checking.assert_is_geq(max_absolute_rotation_angle_deg, MIN_ABSOLUTE_ROTATION_ANGLE_DEG) error_checking.assert_is_leq(max_absolute_rotation_angle_deg, MAX_ABSOLUTE_ROTATION_ANGLE_DEG) absolute_rotation_angles_deg = numpy.random.uniform( low=1., high=max_absolute_rotation_angle_deg, size=num_rotations) possible_signs = numpy.array([-1, 1], dtype=int) return absolute_rotation_angles_deg * numpy.random.choice( possible_signs, size=num_rotations, replace=True)
def sia_for_closed_polygon( polygon_object, num_vertices_in_half_window=NUM_VERTICES_IN_HALF_WINDOW_DEFAULT, num_iterations=NUM_ITERATIONS_DEFAULT, check_input_args=True): """Implements the SIA algorithm for a closed polygon. This method smooths only the exterior of the polygon, ignoring the interior (holes). V = number of exterior vertices :param polygon_object: Instance of `shapely.geometry.Polygon`. :param num_vertices_in_half_window: Number of vertices in smoothing half- window. Number of vertices in full window = 2 * num_vertices_in_half_window + 1. :param num_iterations: Number of iterations. :param check_input_args: Boolean flag. If True, will error-check input arguments. If False, will not. :return: vertex_x_coords_smoothed: length-V numpy array with smoothed x-coordinates of vertices. :return: vertex_y_coords_smoothed: length-V numpy array with smoothed y-coordinates of vertices. """ num_vertices = len(polygon_object.exterior.xy[0]) - 1 if check_input_args: error_checking.assert_is_geq( num_vertices, MIN_VERTICES_IN_POLYGON_OR_LINE) error_checking.assert_is_integer(num_vertices_in_half_window) error_checking.assert_is_geq(num_vertices_in_half_window, 1) error_checking.assert_is_integer(num_iterations) error_checking.assert_is_geq(num_iterations, 1) num_vertices_in_half_window = numpy.min( numpy.array([num_vertices_in_half_window, num_vertices - 1])) for i in range(num_iterations): if i == 0: this_polygon_object = copy.deepcopy(polygon_object) else: this_polygon_object = polygons.vertex_arrays_to_polygon_object( vertex_x_coords_smoothed, vertex_y_coords_smoothed) vertex_x_coords_padded, vertex_y_coords_padded = ( shape_utils.pad_closed_polygon( this_polygon_object, num_padding_vertices=num_vertices_in_half_window, check_input_args=False)) vertex_x_coords_smoothed, vertex_y_coords_smoothed = _sia_one_iteration( vertex_x_coords_padded, vertex_y_coords_padded, num_vertices_in_half_window) vertex_x_coords_smoothed = numpy.concatenate(( vertex_x_coords_smoothed, numpy.array([vertex_x_coords_smoothed[0]]))) vertex_y_coords_smoothed = numpy.concatenate(( vertex_y_coords_smoothed, numpy.array([vertex_y_coords_smoothed[0]]))) return vertex_x_coords_smoothed, vertex_y_coords_smoothed
def get_noisings(num_noisings, max_standard_deviation): """Creates an array of standard deviations for Gaussian noising. These standard deviations are meant for use in `noise_radar_images`. N = number of noisings :param num_noisings: Number of times to noise the image. :param max_standard_deviation: Max standard deviation of Gaussian noise. :return: standard_deviations: length-N numpy array of standard deviations. """ error_checking.assert_is_integer(num_noisings) if num_noisings == 0: return numpy.array([], dtype=float) error_checking.assert_is_greater(num_noisings, 0) error_checking.assert_is_geq(max_standard_deviation, MIN_NOISE_STANDARD_DEVIATION) error_checking.assert_is_leq(max_standard_deviation, MAX_NOISE_STANDARD_DEVIATION) return numpy.random.uniform(low=0., high=max_standard_deviation, size=num_noisings)
def do_2d_pooling(feature_matrix, stride_length_px=2, pooling_type_string=MAX_POOLING_TYPE_STRING): """Pools 2-D feature maps. m = number of rows after pooling n = number of columns after pooling :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x C or 1 x M x N x C. :param stride_length_px: Stride length (pixels). The pooling window will move by this many rows or columns at a time as it slides over each input feature map. :param pooling_type_string: Pooling type (must be accepted by `_check_pooling_type`). :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x m x n x C. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 2) _check_pooling_type(pooling_type_string) if len(feature_matrix.shape) == 3: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=4) feature_tensor = K.pool2d(x=K.variable(feature_matrix), pool_mode=pooling_type_string, pool_size=(stride_length_px, stride_length_px), strides=(stride_length_px, stride_length_px), padding='valid', data_format='channels_last') return feature_tensor.eval(session=K.get_session())
def subset_by_time(example_dict, first_time_unix_sec, last_time_unix_sec): """Subsets examples by time. :param example_dict: Dictionary of examples (in the format returned by `example_io.read_file`). :param first_time_unix_sec: Earliest time to keep. :param last_time_unix_sec: Latest time to keep. :return: example_dict: Same as input but with fewer examples. :return: example_indices: 1-D numpy array with indices of examples kept. """ error_checking.assert_is_integer(first_time_unix_sec) error_checking.assert_is_integer(last_time_unix_sec) error_checking.assert_is_geq(last_time_unix_sec, first_time_unix_sec) good_indices = numpy.where( numpy.logical_and( example_dict[VALID_TIMES_KEY] >= first_time_unix_sec, example_dict[VALID_TIMES_KEY] <= last_time_unix_sec))[0] for this_key in ONE_PER_EXAMPLE_KEYS: if isinstance(example_dict[this_key], list): example_dict[this_key] = [ example_dict[this_key][k] for k in good_indices ] else: example_dict[this_key] = (example_dict[this_key][good_indices, ...]) return example_dict, good_indices
def find_file(directory_name, year, raise_error_if_missing=True): """Finds NetCDF file with RRTM data. :param directory_name: Name of directory where file is expected. :param year: Year (integer). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing == True`, will throw error. If file is missing and `raise_error_if_missing == False`, will return *expected* file path. :return: rrtm_file_name: File path. :raises: ValueError: if file is missing and `raise_error_if_missing == True`. """ error_checking.assert_is_string(directory_name) error_checking.assert_is_integer(year) error_checking.assert_is_boolean(raise_error_if_missing) rrtm_file_name = '{0:s}/rrtm_output_{1:04d}.nc'.format( directory_name, year) if raise_error_if_missing and not os.path.isfile(rrtm_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( rrtm_file_name) raise ValueError(error_string) return rrtm_file_name
def create_histogram(input_values, num_bins, min_value, max_value): """Creates a histogram with uniform bin-spacing. N = number of input values K = number of bins :param input_values: length-N numpy array of input values (to be binned). :param num_bins: Number of bins. :param min_value: Minimum value to include in histogram. Any input value < `min_value` will be assigned to the first bin. :param max_value: Maximum value to include in histogram. Any input value > `max_value` will be assigned to the last bin. :return: input_to_bin_indices: length-N numpy array of bin indices. If input_values[i] = j, the [i]th input value belongs in the [j]th bin. :return: num_examples_by_bin: length-K numpy array, where the [j]th value is the number of inputs assigned to the [j]th bin. """ error_checking.assert_is_numpy_array_without_nan(input_values) error_checking.assert_is_numpy_array(input_values, num_dimensions=1) error_checking.assert_is_integer(num_bins) error_checking.assert_is_geq(num_bins, 2) error_checking.assert_is_greater(max_value, min_value) bin_cutoffs = numpy.linspace(min_value, max_value, num=num_bins + 1) input_to_bin_indices = numpy.digitize( input_values, bin_cutoffs, right=False) - 1 input_to_bin_indices[input_to_bin_indices < 0] = 0 input_to_bin_indices[input_to_bin_indices > num_bins - 1] = num_bins - 1 num_examples_by_bin = numpy.full(num_bins, -1, dtype=int) for j in range(num_bins): num_examples_by_bin[j] = numpy.sum(input_to_bin_indices == j) return input_to_bin_indices, num_examples_by_bin
def _check_input_args(num_iterations, learning_rate, l2_weight=None, radar_constraint_weight=None, minmax_constraint_weight=None, ideal_activation=None): """Error-checks input args for backwards optimization. :param num_iterations: See doc for `_do_gradient_descent`. :param learning_rate: Same. :param l2_weight: Same. :param radar_constraint_weight: Weight used to multiply part of loss function with radar constraints (see doc for `_radar_constraints_to_loss_fn`). :param minmax_constraint_weight: Weight used to multiply part of loss function with min-max constraints (see doc for `_minmax_constraints_to_loss_fn`). :param ideal_activation: See doc for `optimize_input_for_neuron_activation` or `optimize_input_for_channel_activation`. """ error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) if l2_weight is not None: error_checking.assert_is_greater(l2_weight, 0.) if radar_constraint_weight is not None: error_checking.assert_is_greater(radar_constraint_weight, 0.) if minmax_constraint_weight is not None: error_checking.assert_is_greater(minmax_constraint_weight, 0.) if ideal_activation is not None: error_checking.assert_is_greater(ideal_activation, 0.)
def find_processed_file(directory_name, year, raise_error_if_missing=True): """Finds processed file with tornado reports. See `write_processed_file` for the definition of a "processed file". :param directory_name: Name of directory. :param year: Year (integer). :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. :return: processed_file_name: Path to file. If file is missing and raise_error_if_missing = True, this will be the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_string(directory_name) error_checking.assert_is_integer(year) error_checking.assert_is_boolean(raise_error_if_missing) processed_file_name = '{0:s}/tornado_reports_{1:04d}.csv'.format( directory_name, year) if raise_error_if_missing and not os.path.isfile(processed_file_name): error_string = ( 'Cannot find processed file with tornado reports. Expected at: ' '{0:s}').format(processed_file_name) raise ValueError(error_string) return processed_file_name
def _check_args_one_step(predictor_matrix, permuted_flag_matrix, scalar_channel_flags, shuffle_profiles_together, num_bootstrap_reps): """Checks input args for `run_*_test_one_step`. :param predictor_matrix: See doc for `run_forward_test_one_step` or `run_backwards_test_one_step`. :param permuted_flag_matrix: Same. :param scalar_channel_flags: Same. :param shuffle_profiles_together: Same. :param num_bootstrap_reps: Same. :return: num_bootstrap_reps: Same as input but maxxed with 1. """ error_checking.assert_is_numpy_array_without_nan(predictor_matrix) num_predictor_dim = len(predictor_matrix.shape) error_checking.assert_is_geq(num_predictor_dim, 3) error_checking.assert_is_leq(num_predictor_dim, 3) error_checking.assert_is_boolean_numpy_array(permuted_flag_matrix) these_expected_dim = numpy.array(predictor_matrix.shape[1:], dtype=int) error_checking.assert_is_numpy_array(permuted_flag_matrix, exact_dimensions=these_expected_dim) error_checking.assert_is_boolean_numpy_array(scalar_channel_flags) these_expected_dim = numpy.array([predictor_matrix.shape[-1]], dtype=int) error_checking.assert_is_numpy_array(scalar_channel_flags, exact_dimensions=these_expected_dim) error_checking.assert_is_boolean(shuffle_profiles_together) error_checking.assert_is_integer(num_bootstrap_reps) return numpy.maximum(num_bootstrap_reps, 1)
def plot_parallels(basemap_object, axes_object, min_latitude_deg=None, max_latitude_deg=None, num_parallels=DEFAULT_NUM_PARALLELS, line_width=DEFAULT_GRID_LINE_WIDTH, line_colour=DEFAULT_GRID_LINE_COLOUR, z_order=DEFAULT_GRID_LINE_Z_ORDER): """Plots parallels (grid lines for latitude). If `min_latitude_deg` and `max_latitude_deg` are both None, this method will take plotting limits from `basemap_object`. :param basemap_object: See doc for `plot_countries`. :param axes_object: Same. :param min_latitude_deg: Minimum latitude for grid lines. :param max_latitude_deg: Max latitude for grid lines. :param num_parallels: Number of parallels. :param line_width: See doc for `plot_countries`. :param line_colour: Same. :param z_order: Same. """ if min_latitude_deg is None or max_latitude_deg is None: min_latitude_deg = basemap_object.llcrnrlat max_latitude_deg = basemap_object.urcrnrlat error_checking.assert_is_valid_latitude(min_latitude_deg) error_checking.assert_is_valid_latitude(max_latitude_deg) error_checking.assert_is_greater(max_latitude_deg, min_latitude_deg) error_checking.assert_is_integer(num_parallels) error_checking.assert_is_geq(num_parallels, 2) parallel_spacing_deg = ((max_latitude_deg - min_latitude_deg) / (num_parallels - 1)) if parallel_spacing_deg < 1.: parallel_spacing_deg = number_rounding.round_to_nearest( parallel_spacing_deg, 0.1) else: parallel_spacing_deg = numpy.round(parallel_spacing_deg) min_latitude_deg = number_rounding.ceiling_to_nearest( min_latitude_deg, parallel_spacing_deg) max_latitude_deg = number_rounding.floor_to_nearest( max_latitude_deg, parallel_spacing_deg) num_parallels = 1 + int( numpy.round( (max_latitude_deg - min_latitude_deg) / parallel_spacing_deg)) latitudes_deg = numpy.linspace(min_latitude_deg, max_latitude_deg, num=num_parallels) basemap_object.drawparallels(latitudes_deg, color=colour_from_numpy_to_tuple(line_colour), linewidth=line_width, labels=[True, False, False, False], ax=axes_object, zorder=z_order)
def check_component_metadata( component_type_string, target_class=None, layer_name=None, neuron_indices=None, channel_index=None): """Checks metadata for model component. :param component_type_string: Component type (must be accepted by `check_component_type`). :param target_class: [used only if component_type_string = "class"] Target class. Integer from 0...(K - 1), where K = number of classes. :param layer_name: [used only if component_type_string = "neuron" or "channel"] Name of layer containing neuron or channel. :param neuron_indices: [used only if component_type_string = "neuron"] 1-D numpy array with indices of neuron. :param channel_index: [used only if component_type_string = "channel"] Index of channel. """ check_component_type(component_type_string) if component_type_string == CLASS_COMPONENT_TYPE_STRING: error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) if component_type_string in [NEURON_COMPONENT_TYPE_STRING, CHANNEL_COMPONENT_TYPE_STRING]: error_checking.assert_is_string(layer_name) if component_type_string == NEURON_COMPONENT_TYPE_STRING: error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) if component_type_string == CHANNEL_COMPONENT_TYPE_STRING: error_checking.assert_is_integer(channel_index) error_checking.assert_is_geq(channel_index, 0)
def dimensions_to_grid(num_rows, num_columns): """Determines grid from dimensions. :param num_rows: Number of rows (unique y-coordinates of grid points). :param num_columns: Number of columns (unique x-coordinates of grid points). :return: grid_name: Grid name. :raises: ValueError: if dimensions do not match a known grid. """ error_checking.assert_is_integer(num_rows) error_checking.assert_is_integer(num_columns) grid_dimensions = numpy.array([num_rows, num_columns], dtype=int) for this_grid_name in NARR_GRID_NAMES: these_dimensions = numpy.array(get_grid_dimensions( model_name=NARR_MODEL_NAME, grid_name=this_grid_name), dtype=int) if numpy.array_equal(these_dimensions, grid_dimensions): return this_grid_name for this_grid_name in RUC_GRID_NAMES: these_dimensions = numpy.array(get_grid_dimensions( model_name=RUC_MODEL_NAME, grid_name=this_grid_name), dtype=int) if numpy.array_equal(these_dimensions, grid_dimensions): return this_grid_name error_string = 'Cannot find grid with {0:d} rows and {1:d} columns.'.format( num_rows, num_columns) raise ValueError(error_string)
def close_frontal_image(ternary_image_matrix, num_iterations=1): """Applies binary closing to both warm and cold fronts in image. :param ternary_image_matrix: See doc for `_check_frontal_image`. :param num_iterations: Number of iterations of binary closing. The more iterations, the more frontal pixels will be created. :return: ternary_image_matrix: Same as input, but after closing. """ _check_frontal_image(image_matrix=ternary_image_matrix, assert_binary=False) error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) binary_warm_front_matrix = binary_closing( (ternary_image_matrix == WARM_FRONT_INTEGER_ID).astype(int), structure=STRUCTURE_MATRIX_FOR_BINARY_CLOSING, origin=0, iterations=num_iterations) binary_cold_front_matrix = binary_closing( (ternary_image_matrix == COLD_FRONT_INTEGER_ID).astype(int), structure=STRUCTURE_MATRIX_FOR_BINARY_CLOSING, origin=0, iterations=num_iterations) ternary_image_matrix[numpy.where( binary_warm_front_matrix)] = WARM_FRONT_INTEGER_ID ternary_image_matrix[numpy.where( binary_cold_front_matrix)] = COLD_FRONT_INTEGER_ID return ternary_image_matrix
def do_3d_pooling(feature_matrix, stride_length_px=2, pooling_type_string=MAX_POOLING_TYPE_STRING): """Pools 3-D feature maps. :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x H x C or 1 x M x N x H x C. :param stride_length_px: See doc for `do_2d_pooling`.import tensorflow.python.keras.backend as K :param pooling_type_string: Pooling type (must be accepted by `_check_pooling_type`). :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x m x n x h x C. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_integer(stride_length_px) error_checking.assert_is_geq(stride_length_px, 2) _check_pooling_type(pooling_type_string) if len(feature_matrix.shape) == 4: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=5) feature_tensor = K.pool3d(x=K.variable(feature_matrix), pool_mode=pooling_type_string, pool_size=(stride_length_px, stride_length_px, stride_length_px), strides=(stride_length_px, stride_length_px, stride_length_px), padding='valid', data_format='channels_last') return feature_tensor.numpy()
def _check_model_fields(field_matrix, field_name, pressure_level_pascals, valid_times_unix_sec): """Checks model fields for errors. M = number of rows (unique grid-point y-coordinates) N = number of columns (unique grid-point x-coordinates) T = number of time steps :param field_matrix: T-by-M-by-N numpy array with values of a single field (atmospheric variable). :param field_name: Field name in GewitterGefahr format. :param pressure_level_pascals: Pressure level (integer Pascals). :param valid_times_unix_sec: length-T numpy array of valid times. """ check_field_name(field_name, require_standard=False) error_checking.assert_is_integer(pressure_level_pascals) error_checking.assert_is_integer_numpy_array(valid_times_unix_sec) error_checking.assert_is_numpy_array(valid_times_unix_sec, num_dimensions=1) num_times = len(valid_times_unix_sec) num_grid_rows, num_grid_columns = nwp_model_utils.get_grid_dimensions( model_name=nwp_model_utils.NARR_MODEL_NAME) error_checking.assert_is_real_numpy_array(field_matrix) error_checking.assert_is_numpy_array(field_matrix, num_dimensions=3) error_checking.assert_is_numpy_array(field_matrix, exact_dimensions=numpy.array([ num_times, num_grid_rows, num_grid_columns ]))
def check_metadata(component_type_string, target_class=None, layer_name=None, ideal_activation=None, neuron_indices=None, channel_index=None): """Error-checks metadata for saliency calculations. :param component_type_string: Component type (must be accepted by `model_interpretation.check_component_type`). :param target_class: See doc for `get_saliency_maps_for_class_activation`. :param layer_name: See doc for `get_saliency_maps_for_neuron_activation` or `get_saliency_maps_for_channel_activation`. :param ideal_activation: Same. :param neuron_indices: See doc for `get_saliency_maps_for_neuron_activation`. :param channel_index: See doc for `get_saliency_maps_for_class_activation`. :return: metadata_dict: Dictionary with the following keys. metadata_dict['component_type_string']: See input doc. metadata_dict['target_class']: Same. metadata_dict['layer_name']: Same. metadata_dict['ideal_activation']: Same. metadata_dict['neuron_indices']: Same. metadata_dict['channel_index']: Same. """ model_interpretation.check_component_type(component_type_string) if (component_type_string == model_interpretation.CLASS_COMPONENT_TYPE_STRING): error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) if component_type_string in [ model_interpretation.NEURON_COMPONENT_TYPE_STRING, model_interpretation.CHANNEL_COMPONENT_TYPE_STRING ]: error_checking.assert_is_string(layer_name) if ideal_activation is not None: error_checking.assert_is_greater(ideal_activation, 0.) if (component_type_string == model_interpretation.NEURON_COMPONENT_TYPE_STRING): error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) if (component_type_string == model_interpretation.CHANNEL_COMPONENT_TYPE_STRING): error_checking.assert_is_integer(channel_index) error_checking.assert_is_geq(channel_index, 0) return { COMPONENT_TYPE_KEY: component_type_string, TARGET_CLASS_KEY: target_class, LAYER_NAME_KEY: layer_name, IDEAL_ACTIVATION_KEY: ideal_activation, NEURON_INDICES_KEY: neuron_indices, CHANNEL_INDEX_KEY: channel_index }
def do_2d_upsampling(feature_matrix, upsampling_factor=2, use_linear_interp=True): """Upsamples 2-D feature maps. m = number of rows after upsampling n = number of columns after upsampling :param feature_matrix: Input feature maps (numpy array). Dimensions must be M x N x C or 1 x M x N x C. :param upsampling_factor: Upsampling factor (integer > 1). :param use_linear_interp: Boolean flag. If True (False), will use linear (nearest-neighbour) interpolation. :return: feature_matrix: Output feature maps (numpy array). Dimensions will be 1 x m x n x C. """ error_checking.assert_is_numpy_array_without_nan(feature_matrix) error_checking.assert_is_integer(upsampling_factor) error_checking.assert_is_geq(upsampling_factor, 2) error_checking.assert_is_boolean(use_linear_interp) if len(feature_matrix.shape) == 3: feature_matrix = numpy.expand_dims(feature_matrix, axis=0) error_checking.assert_is_numpy_array(feature_matrix, num_dimensions=4)
def trim_whitespace(input_file_name, output_file_name, border_width_pixels=10, convert_exe_name=DEFAULT_CONVERT_EXE_NAME): """Trims whitespace around edge of image. :param input_file_name: Path to input file (may be in any format handled by ImageMagick). :param output_file_name: Path to output file. :param border_width_pixels: Desired border width (whitespace). :param convert_exe_name: Path to executable file for ImageMagick's "convert" function. If you installed ImageMagick with root access, this should be the default. Regardless, the pathless file name should be just "convert". :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_file_exists(input_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_is_integer(border_width_pixels) error_checking.assert_is_geq(border_width_pixels, 0) error_checking.assert_file_exists(convert_exe_name) command_string = ( '"{0:s}" "{1:s}" -trim -bordercolor White -border {2:d} "{3:s}"' ).format(convert_exe_name, input_file_name, border_width_pixels, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def _check_input_args(list_of_baseline_matrices, list_of_trial_matrices, num_iterations, confidence_level): """Error-checks input args for Monte Carlo test. :param list_of_baseline_matrices: See doc for `run_monte_carlo_test`. :param list_of_trial_matrices: Same. :param num_iterations: Same. :param confidence_level: Same. :raises: ValueError: if number of baseline matrices (input tensors to model) != number of trial matrices. :raises: TypeError: if all "input matrices" are None. :return: num_examples_per_set: Number of examples in each set. """ error_checking.assert_is_integer(num_iterations) error_checking.assert_is_geq(num_iterations, 100) error_checking.assert_is_geq(confidence_level, 0.) error_checking.assert_is_leq(confidence_level, 1.) num_baseline_matrices = len(list_of_baseline_matrices) num_trial_matrices = len(list_of_trial_matrices) if num_baseline_matrices != num_trial_matrices: error_string = ( 'Number of baseline matrices ({0:d}) should = number of trial ' 'matrices ({1:d}).').format(num_baseline_matrices, num_trial_matrices) raise ValueError(error_string) num_matrices = num_trial_matrices num_examples_per_set = None for i in range(num_matrices): if (list_of_baseline_matrices[i] is None and list_of_trial_matrices[i] is None): continue error_checking.assert_is_numpy_array(list_of_baseline_matrices[i]) if num_examples_per_set is None: num_examples_per_set = list_of_baseline_matrices[i].shape[0] these_expected_dim = numpy.array( (num_examples_per_set, ) + list_of_baseline_matrices[i].shape[1:], dtype=int) error_checking.assert_is_numpy_array( list_of_baseline_matrices[i], exact_dimensions=these_expected_dim) these_expected_dim = numpy.array(list_of_baseline_matrices[i].shape, dtype=int) error_checking.assert_is_numpy_array( list_of_trial_matrices[i], exact_dimensions=these_expected_dim) if num_examples_per_set is None: raise TypeError('All "input matrices" are None.') return num_examples_per_set
def check_metadata(layer_name, neuron_indices, ideal_activation, num_iterations, learning_rate, l2_weight): """Checks metadata for errors. :param layer_name: Name of layer with relevant neuron. :param neuron_indices: 1-D numpy array with indices of relevant neuron. Must have length D - 1, where D = number of dimensions in layer output. The first dimension is the batch dimension, which always has length `None` in Keras. :param ideal_activation: Ideal neuron activation, used to define loss function. The loss function will be (neuron_activation - ideal_activation)**2. :param num_iterations: Number of iterations for gradient descent. :param learning_rate: Learning rate for gradient descent. :param l2_weight: L2 weight (penalty for difference between initial and final predictor matrix) in loss function. """ error_checking.assert_is_string(layer_name) error_checking.assert_is_integer_numpy_array(neuron_indices) error_checking.assert_is_geq_numpy_array(neuron_indices, 0) error_checking.assert_is_numpy_array(neuron_indices, num_dimensions=1) error_checking.assert_is_not_nan(ideal_activation) error_checking.assert_is_integer(num_iterations) error_checking.assert_is_greater(num_iterations, 0) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_less_than(learning_rate, 1.) error_checking.assert_is_geq(l2_weight, 0.)
def create_fake_heights(real_heights_m_agl, num_padding_heights): """Creates fake heights for padding at top of profile. :param real_heights_m_agl: 1-D numpy array of real heights (metres above ground level). :param num_padding_heights: Number of heights to pad at top. :return: heights_m_agl: 1-D numpy array with all heights (real followed by fake). """ error_checking.assert_is_numpy_array(real_heights_m_agl, num_dimensions=1) error_checking.assert_is_geq_numpy_array(real_heights_m_agl, 0.) assert numpy.allclose(real_heights_m_agl, numpy.sort(real_heights_m_agl), atol=TOLERANCE) error_checking.assert_is_integer(num_padding_heights) error_checking.assert_is_geq(num_padding_heights, 0) if num_padding_heights == 0: return real_heights_m_agl fake_heights_m_agl = numpy.linspace(1, num_padding_heights, num=num_padding_heights, dtype=float) fake_heights_m_agl = real_heights_m_agl[-1] + 1e6 * fake_heights_m_agl return numpy.concatenate((real_heights_m_agl, fake_heights_m_agl), axis=0)
def find_local_raw_file(year, directory_name=None, raise_error_if_missing=True): """Finds raw file on local machine. This file should contain all storm reports for one year. :param year: [integer] Will look for file from this year. :param directory_name: Name of directory with Storm Events files. :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. :return: raw_file_name: File path. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_integer(year) error_checking.assert_is_string(directory_name) error_checking.assert_is_boolean(raise_error_if_missing) raw_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format( directory_name, PATHLESS_RAW_FILE_PREFIX, _year_number_to_string(year), RAW_FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(raw_file_name): raise ValueError('Cannot find raw file. Expected at location: ' + raw_file_name) return raw_file_name
def resize_image(input_file_name, output_file_name, output_size_pixels, convert_exe_name=DEFAULT_CONVERT_EXE_NAME): """Resizes image. :param input_file_name: Path to input file (may be in any format handled by ImageMagick). :param output_file_name: Path to output file. :param output_size_pixels: Output size. :param convert_exe_name: See doc for `trim_whitespace`. :raises: ValueError: if ImageMagick command (which is ultimately a Unix command) fails. """ error_checking.assert_file_exists(input_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) error_checking.assert_is_integer(output_size_pixels) error_checking.assert_is_greater(output_size_pixels, 0) error_checking.assert_file_exists(convert_exe_name) command_string = '"{0:s}" "{1:s}" -resize {2:d}@ "{3:s}"'.format( convert_exe_name, input_file_name, output_size_pixels, output_file_name) exit_code = os.system(command_string) if exit_code == 0: return raise ValueError(ERROR_STRING)
def find_file(year, directory_name, raise_error_if_missing=True): """Finds Storm Events file. This file should contain all storm reports for one year. :param year: Year (integer). :param directory_name: Name of directory with Storm Events files. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, this method will error out. :return: storm_event_file_name: Path to Storm Events file. If file is missing and raise_error_if_missing = False, this will be the *expected* path. :raises: ValueError: if file is missing and raise_error_if_missing = True. """ error_checking.assert_is_integer(year) error_checking.assert_is_string(directory_name) error_checking.assert_is_boolean(raise_error_if_missing) storm_event_file_name = '{0:s}/{1:s}{2:s}{3:s}'.format( directory_name, PATHLESS_FILE_PREFIX, _year_number_to_string(year), FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(storm_event_file_name): error_string = ('Cannot find Storm Events file. Expected at: {0:s}'. format(storm_event_file_name)) raise ValueError(error_string) return storm_event_file_name
def plot_multipass_test(permutation_dict, axes_object=None, num_predictors_to_plot=None, plot_percent_increase=False, confidence_level=DEFAULT_CONFIDENCE_LEVEL, bar_face_colour=None): """Plots results of multi-pass (Lakshmanan) permutation test. :param permutation_dict: See doc for `plot_single_pass_test`. :param axes_object: Same. :param num_predictors_to_plot: Same. :param plot_percent_increase: Same. :param confidence_level: Same. :param bar_face_colour: Same. """ # Check input args. predictor_names = permutation_dict[permutation_utils.BEST_PREDICTORS_KEY] if num_predictors_to_plot is None: num_predictors_to_plot = len(predictor_names) error_checking.assert_is_integer(num_predictors_to_plot) error_checking.assert_is_greater(num_predictors_to_plot, 0) num_predictors_to_plot = min( [num_predictors_to_plot, len(predictor_names)]) error_checking.assert_is_boolean(plot_percent_increase) # Set up plotting args. backwards_flag = permutation_dict[permutation_utils.BACKWARDS_FLAG] perturbed_cost_matrix = permutation_dict[ permutation_utils.BEST_COST_MATRIX_KEY] perturbed_cost_matrix = perturbed_cost_matrix[:num_predictors_to_plot, :] predictor_names = predictor_names[:num_predictors_to_plot] original_cost_array = permutation_dict[ permutation_utils.ORIGINAL_COST_ARRAY_KEY] original_cost_matrix = numpy.reshape(original_cost_array, (1, original_cost_array.size)) cost_matrix = numpy.concatenate( (original_cost_matrix, perturbed_cost_matrix), axis=0) # Do plotting. if backwards_flag: clean_cost_array = permutation_dict[ permutation_utils.BEST_COST_MATRIX_KEY][-1, :] else: clean_cost_array = original_cost_array _plot_bars(cost_matrix=cost_matrix, clean_cost_array=clean_cost_array, predictor_names=predictor_names, plot_percent_increase=plot_percent_increase, backwards_flag=backwards_flag, multipass_flag=True, confidence_level=confidence_level, axes_object=axes_object, bar_face_colour=bar_face_colour)
def find_single_field_file(init_time_unix_sec, lead_time_hours=None, model_name=None, grid_id=None, grib1_field_name=None, top_directory_name=None, raise_error_if_missing=True): """Finds with single field on local machine. "Single field" = one variable at one time step and all grid cells. :param init_time_unix_sec: Model-initialization time (Unix format). :param lead_time_hours: Lead time (valid time minus init time). If model is a reanalysis, you can leave this as None (always zero). :param model_name: Name of model. :param grid_id: String ID for model grid. :param grib1_field_name: Field name in grib1 format. :param top_directory_name: Name of top-level directory with single-field files for the given model/grib combo. :param raise_error_if_missing: :param raise_error_if_missing: Boolean flag. If True and file is missing, will raise an error. :return: single_field_file_name: Path to single-field file. If file is missing but raise_error_if_missing = False, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(grib1_field_name) error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) nwp_model_utils.check_model_name(model_name) if model_name == nwp_model_utils.NARR_MODEL_NAME: lead_time_hours = 0 error_checking.assert_is_integer(lead_time_hours) error_checking.assert_is_geq(lead_time_hours, 0) pathless_file_name = _get_pathless_single_field_file_name( init_time_unix_sec, lead_time_hours=lead_time_hours, model_name=model_name, grid_id=grid_id, grib1_field_name=grib1_field_name) single_field_file_name = '{0:s}/{1:s}/{2:s}'.format( top_directory_name, time_conversion.unix_sec_to_string(init_time_unix_sec, TIME_FORMAT_MONTH), pathless_file_name) if raise_error_if_missing and not os.path.isfile(single_field_file_name): raise ValueError('Cannot find single-field file. Expected at: ' + single_field_file_name) return single_field_file_name
def time_to_spc_date_string(unix_time_sec): """Converts time in Unix format to SPC date in string format. :param unix_time_sec: Time in Unix format. :return: spc_date_string: SPC date in format "yyyymmdd". """ error_checking.assert_is_integer(unix_time_sec) return unix_sec_to_string(unix_time_sec - DAYS_TO_SECONDS // 2, SPC_DATE_FORMAT)
def _get_grid_points_in_storms(storm_object_table, num_grid_rows=None, num_grid_columns=None): """Finds grid points in all storm objects. N = number of storm objects P = number of grid points in a storm object :param storm_object_table: N-row pandas DataFrame in format specified by `storm_tracking_io.write_processed_file`. :param num_grid_rows: Number of rows (unique grid-point latitudes). :param num_grid_columns: Number of columns (unique grid-point longitudes). :return: grid_points_in_storms_table: P-row pandas DataFrame with the following columns. grid_points_in_storms_table.flattened_index: Flattened index (integer) of grid point. grid_points_in_storms_table.storm_id: String ID for storm cell. """ error_checking.assert_is_integer(num_grid_rows) error_checking.assert_is_greater(num_grid_rows, 0) error_checking.assert_is_integer(num_grid_columns) error_checking.assert_is_greater(num_grid_columns, 0) grid_point_row_indices = numpy.array([]) grid_point_column_indices = numpy.array([]) grid_point_storm_ids = [] num_storms = len(storm_object_table.index) for i in range(num_storms): grid_point_row_indices = numpy.concatenate( (grid_point_row_indices, storm_object_table[tracking_io.GRID_POINT_ROW_COLUMN].values[i])) grid_point_column_indices = numpy.concatenate( (grid_point_column_indices, storm_object_table[tracking_io.GRID_POINT_COLUMN_COLUMN].values[i] )) this_num_grid_points = len( storm_object_table[tracking_io.GRID_POINT_ROW_COLUMN].values[i]) this_storm_id_list = ( [storm_object_table[tracking_io.STORM_ID_COLUMN].values[i]] * this_num_grid_points) grid_point_storm_ids += this_storm_id_list grid_point_flattened_indices = numpy.ravel_multi_index( (grid_point_row_indices.astype(int), grid_point_column_indices.astype(int)), (num_grid_rows, num_grid_columns)) grid_points_in_storms_dict = { FLATTENED_INDEX_COLUMN: grid_point_flattened_indices, STORM_ID_COLUMN: grid_point_storm_ids } return pandas.DataFrame.from_dict(grid_points_in_storms_dict)
def zero_top_heating_rate_function(heating_rate_channel_index, height_index): """Returns function that zeroes predicted heating rate at top of profile. :param heating_rate_channel_index: Channel index for heating rate. :param height_index: Will zero out heating rate at this height. :return: zeroing_function: Function handle (see below). """ error_checking.assert_is_integer(heating_rate_channel_index) error_checking.assert_is_geq(heating_rate_channel_index, 0) error_checking.assert_is_integer(height_index) error_checking.assert_is_geq(height_index, 0) def zeroing_function(orig_prediction_tensor): """Zeroes out predicted heating rate at top of profile. :param orig_prediction_tensor: Keras tensor with model predictions. :return: new_prediction_tensor: Same as input but with top heating rate zeroed out. """ num_heights = orig_prediction_tensor.get_shape().as_list()[-2] num_channels = orig_prediction_tensor.get_shape().as_list()[-1] zero_tensor = K.greater_equal( orig_prediction_tensor[..., height_index, heating_rate_channel_index], 1e12) zero_tensor = K.cast(zero_tensor, dtype=K.floatx()) heating_rate_tensor = K.concatenate( (orig_prediction_tensor[..., heating_rate_channel_index][ ..., :height_index], K.expand_dims(zero_tensor, axis=-1)), axis=-1) if height_index != num_heights - 1: heating_rate_tensor = K.concatenate( (heating_rate_tensor, orig_prediction_tensor[..., heating_rate_channel_index][..., ( height_index + 1):]), axis=-1) new_prediction_tensor = K.concatenate( (orig_prediction_tensor[..., :heating_rate_channel_index], K.expand_dims(heating_rate_tensor, axis=-1)), axis=-1) if heating_rate_channel_index == num_channels - 1: return new_prediction_tensor return K.concatenate( (new_prediction_tensor, orig_prediction_tensor[..., (heating_rate_channel_index + 1):]), axis=-1) return zeroing_function