def test_get_translations(self): """Ensures correct output from get_translations.""" (these_x_offsets_pixels, these_y_offsets_pixels ) = data_augmentation.get_translations( num_translations=NUM_TRANSLATIONS, max_translation_pixels=MAX_TRANSLATION_PIXELS, num_grid_rows=2 * MAX_TRANSLATION_PIXELS, num_grid_columns=2 * MAX_TRANSLATION_PIXELS) self.assertTrue(len(these_x_offsets_pixels) == NUM_TRANSLATIONS) error_checking.assert_is_geq_numpy_array( these_x_offsets_pixels, -MAX_TRANSLATION_PIXELS) error_checking.assert_is_leq_numpy_array( these_x_offsets_pixels, MAX_TRANSLATION_PIXELS) self.assertTrue(len(these_y_offsets_pixels) == NUM_TRANSLATIONS) error_checking.assert_is_geq_numpy_array( these_y_offsets_pixels, -MAX_TRANSLATION_PIXELS) error_checking.assert_is_leq_numpy_array( these_y_offsets_pixels, MAX_TRANSLATION_PIXELS) error_checking.assert_is_greater_numpy_array( numpy.absolute(these_x_offsets_pixels) + numpy.absolute(these_y_offsets_pixels), 0)
def dimensions_to_grid_id(grid_dimensions): """Determines grid from dimensions. :param grid_dimensions: 1-D numpy array with [num_rows, num_columns]. :return: grid_id: String ID for grid. :raises: ValueError: if dimensions do not match a known grid. """ error_checking.assert_is_numpy_array(grid_dimensions, exact_dimensions=numpy.array([2])) error_checking.assert_is_integer_numpy_array(grid_dimensions) error_checking.assert_is_greater_numpy_array(grid_dimensions, 1) these_dimensions = get_grid_dimensions(NARR_MODEL_NAME) if numpy.array_equal(these_dimensions, grid_dimensions): return ID_FOR_221GRID for this_grid_id in RUC_GRID_IDS: these_dimensions = get_grid_dimensions(RUC_MODEL_NAME, this_grid_id) if numpy.array_equal(these_dimensions, grid_dimensions): return this_grid_id raise ValueError('Dimensions (' + str(grid_dimensions[0]) + ' rows x ' + str(grid_dimensions[1]) + ' columns) do not match a known grid.')
def project_wind_to_thermal_gradient(u_matrix_grid_relative_m_s01, v_matrix_grid_relative_m_s01, thermal_field_matrix_kelvins, x_spacing_metres, y_spacing_metres): """At each grid point, projects wind to direction of thermal gradient. M = number of rows in grid N = number of columns in grid :param u_matrix_grid_relative_m_s01: M-by-N numpy array of grid-relative u-wind (in the direction of increasing column number, or towards the right). Units are metres per second. :param v_matrix_grid_relative_m_s01: M-by-N numpy array of grid-relative v-wind (in the direction of increasing row number, or towards the bottom). :param thermal_field_matrix_kelvins: See doc for `get_thermal_front_param`. :param x_spacing_metres: Same. :param y_spacing_metres: Same. :return: projected_velocity_matrix_m_s01: M-by-N numpy array with wind velocity in direction of thermal gradient. Positive (negative) values mean that the wind is blowing towards warmer (cooler) air. """ error_checking.assert_is_numpy_array_without_nan( u_matrix_grid_relative_m_s01) error_checking.assert_is_numpy_array(u_matrix_grid_relative_m_s01, num_dimensions=2) error_checking.assert_is_numpy_array_without_nan( v_matrix_grid_relative_m_s01) error_checking.assert_is_numpy_array( v_matrix_grid_relative_m_s01, exact_dimensions=numpy.array(u_matrix_grid_relative_m_s01.shape)) error_checking.assert_is_numpy_array_without_nan( thermal_field_matrix_kelvins) error_checking.assert_is_greater_numpy_array(thermal_field_matrix_kelvins, 0.) error_checking.assert_is_numpy_array( thermal_field_matrix_kelvins, exact_dimensions=numpy.array(u_matrix_grid_relative_m_s01.shape)) x_grad_matrix_kelvins_m01, y_grad_matrix_kelvins_m01 = _get_2d_gradient( field_matrix=thermal_field_matrix_kelvins, x_spacing_metres=x_spacing_metres, y_spacing_metres=y_spacing_metres) y_grad_matrix_kelvins_m01 = y_grad_matrix_kelvins_m01 grad_magnitude_matrix_kelvins_m01 = numpy.sqrt( x_grad_matrix_kelvins_m01**2 + y_grad_matrix_kelvins_m01**2) first_matrix = (u_matrix_grid_relative_m_s01 * x_grad_matrix_kelvins_m01 / grad_magnitude_matrix_kelvins_m01) first_matrix[numpy.isnan(first_matrix)] = 0. second_matrix = (v_matrix_grid_relative_m_s01 * y_grad_matrix_kelvins_m01 / grad_magnitude_matrix_kelvins_m01) second_matrix[numpy.isnan(second_matrix)] = 0. return first_matrix + second_matrix
def test_assert_is_positive_numpy_array_true_with_nan_allowed(self): """Checks assert_is_greater_numpy_array; base_value = 0, inputs > 0. In this case, input array contains NaN's and allow_nan = True. """ error_checking.assert_is_greater_numpy_array( POSITIVE_NUMPY_ARRAY_WITH_NANS, 0, allow_nan=True)
def pressure_to_height(pressures_pascals): """Converts pressures to heights. :param pressures_pascals: numpy array of pressures. :return: heights_m_asl: equivalent-size numpy array of heights (metres above sea level). """ error_checking.assert_is_greater_numpy_array(pressures_pascals, 0.) original_shape = pressures_pascals.shape pressures_pascals = numpy.ravel(pressures_pascals) num_points = len(pressures_pascals) heights_m_asl = numpy.full(num_points, numpy.nan) for i in range(len(STANDARD_PRESSURES_PASCALS) + 1): if i == 0: this_bottom_index = 0 this_top_index = 1 this_min_pressure_pascals = STANDARD_PRESSURES_PASCALS[0] this_max_pressure_pascals = numpy.inf elif i == len(STANDARD_PRESSURES_PASCALS): this_bottom_index = -2 this_top_index = -1 this_min_pressure_pascals = 0. this_max_pressure_pascals = STANDARD_PRESSURES_PASCALS[-1] else: this_bottom_index = i - 1 this_top_index = i this_min_pressure_pascals = STANDARD_PRESSURES_PASCALS[i] this_max_pressure_pascals = STANDARD_PRESSURES_PASCALS[i - 1] these_indices = numpy.where( numpy.logical_and( pressures_pascals >= this_min_pressure_pascals, pressures_pascals < this_max_pressure_pascals))[0] if len(these_indices) == 0: continue this_numerator = (STANDARD_HEIGHTS_M_ASL[this_bottom_index] - STANDARD_HEIGHTS_M_ASL[this_top_index]) this_denominator = (numpy.log( STANDARD_PRESSURES_PASCALS[this_top_index] / STANDARD_PRESSURES_PASCALS[this_bottom_index])) this_e_folding_height_metres = this_numerator / this_denominator these_logs = numpy.log(pressures_pascals[these_indices] / STANDARD_PRESSURES_PASCALS[this_bottom_index]) heights_m_asl[these_indices] = ( STANDARD_HEIGHTS_M_ASL[this_bottom_index] - this_e_folding_height_metres * these_logs) return numpy.reshape(heights_m_asl, original_shape)
def get_thermal_front_param(thermal_field_matrix_kelvins, x_spacing_metres, y_spacing_metres): """Computes thermal front parameter (TFP) at each grid point. TFP is defined in Renard and Clarke (1965). M = number of rows in grid N = number of columns in grid :param thermal_field_matrix_kelvins: M-by-N numpy array with values of thermal variable. This can be any thermal variable ([potential] temperature, wet-bulb [potential] temperature, equivalent [potential] temperature, etc.). :param x_spacing_metres: Spacing between grid points in adjacent columns. :param y_spacing_metres: Spacing between grid points in adjacent rows. :return: tfp_matrix_kelvins_m02: M-by-N numpy array with TFP at each grid point. Units are Kelvins per m^2. """ error_checking.assert_is_numpy_array_without_nan( thermal_field_matrix_kelvins) error_checking.assert_is_greater_numpy_array(thermal_field_matrix_kelvins, 0.) error_checking.assert_is_numpy_array(thermal_field_matrix_kelvins, num_dimensions=2) error_checking.assert_is_greater(x_spacing_metres, 0.) error_checking.assert_is_greater(y_spacing_metres, 0.) x_grad_matrix_kelvins_m01, y_grad_matrix_kelvins_m01 = _get_2d_gradient( field_matrix=thermal_field_matrix_kelvins, x_spacing_metres=x_spacing_metres, y_spacing_metres=y_spacing_metres) grad_magnitude_matrix_kelvins_m01 = numpy.sqrt( x_grad_matrix_kelvins_m01**2 + y_grad_matrix_kelvins_m01**2) (x_grad_grad_matrix_kelvins_m02, y_grad_grad_matrix_kelvins_m02) = _get_2d_gradient( field_matrix=grad_magnitude_matrix_kelvins_m01, x_spacing_metres=x_spacing_metres, y_spacing_metres=y_spacing_metres) first_matrix = (-x_grad_grad_matrix_kelvins_m02 * x_grad_matrix_kelvins_m01 / grad_magnitude_matrix_kelvins_m01) first_matrix[numpy.isnan(first_matrix)] = 0. second_matrix = (-y_grad_grad_matrix_kelvins_m02 * y_grad_matrix_kelvins_m01 / grad_magnitude_matrix_kelvins_m01) second_matrix[numpy.isnan(second_matrix)] = 0. return first_matrix + second_matrix
def check_time_separation(unix_times_sec, early_indices=None, late_indices=None, time_separation_sec=DEFAULT_TIME_SEPARATION_SEC): """Ensures that there is a separation (buffer) between two sets of times. :param unix_times_sec: See documentation for _apply_time_separation. :param early_indices: See documentation for _apply_time_separation. :param late_indices: See documentation for _apply_time_separation. :param time_separation_sec: See documentation for _apply_time_separation. :raises: ValueError: if separation between sets is < `time_separation_sec`. """ error_checking.assert_is_integer_numpy_array(unix_times_sec) error_checking.assert_is_numpy_array_without_nan(unix_times_sec) error_checking.assert_is_numpy_array(unix_times_sec, num_dimensions=1) num_times = len(unix_times_sec) error_checking.assert_is_integer_numpy_array(early_indices) error_checking.assert_is_numpy_array(early_indices, num_dimensions=1) error_checking.assert_is_geq_numpy_array(early_indices, 0) error_checking.assert_is_leq_numpy_array(early_indices, num_times - 1) error_checking.assert_is_integer_numpy_array(late_indices) error_checking.assert_is_numpy_array(late_indices, num_dimensions=1) error_checking.assert_is_geq_numpy_array(late_indices, 0) error_checking.assert_is_leq_numpy_array(late_indices, num_times - 1) error_checking.assert_is_greater_numpy_array( unix_times_sec[late_indices], numpy.max(unix_times_sec[early_indices])) error_checking.assert_is_integer(time_separation_sec) error_checking.assert_is_greater(time_separation_sec, 0) last_early_time_unix_sec = numpy.max(unix_times_sec[early_indices]) first_late_time_unix_sec = numpy.min(unix_times_sec[late_indices]) min_diff_between_sets_sec = (first_late_time_unix_sec - last_early_time_unix_sec) if min_diff_between_sets_sec < time_separation_sec: last_early_time_string = time_conversion.unix_sec_to_string( last_early_time_unix_sec, TIME_STRING_FORMAT) first_late_time_string = time_conversion.unix_sec_to_string( first_late_time_unix_sec, TIME_STRING_FORMAT) error_string = ('Last time in early set is ' + last_early_time_string + '. First time in late set is ' + first_late_time_string + '. This is a time separation of ' + str(min_diff_between_sets_sec) + ' seconds between sets. Required separation is >= ' + str(time_separation_sec) + ' s.') raise ValueError(error_string)
def unzip_1day_tar_file(tar_file_name, spc_date_string, top_target_dir_name, scales_to_extract_metres2): """Unzips tar file with segmotion output for one SPC date. :param tar_file_name: Path to input file. :param spc_date_string: SPC date (format "yyyymmdd"). :param top_target_dir_name: Name of top-level output directory. :param scales_to_extract_metres2: 1-D numpy array of tracking scales to extract. :return: target_directory_name: Path to output directory. This will be "<top_target_directory_name>/<yyyymmdd>", where <yyyymmdd> is the SPC date. """ # Verification. _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string) error_checking.assert_file_exists(tar_file_name) error_checking.assert_is_greater_numpy_array(scales_to_extract_metres2, 0) error_checking.assert_is_numpy_array(scales_to_extract_metres2, num_dimensions=1) scales_to_extract_metres2 = numpy.round(scales_to_extract_metres2).astype( int) num_scales_to_extract = len(scales_to_extract_metres2) directory_names_to_unzip = [] for j in range(num_scales_to_extract): this_relative_stats_dir_name = '{0:s}/{1:s}'.format( spc_date_string, _get_relative_stats_dir_physical_scale( scales_to_extract_metres2[j])) this_relative_polygon_dir_name = '{0:s}/{1:s}'.format( spc_date_string, _get_relative_polygon_dir_physical_scale( scales_to_extract_metres2[j])) directory_names_to_unzip.append( this_relative_stats_dir_name.replace(spc_date_string + '/', '')) directory_names_to_unzip.append( this_relative_polygon_dir_name.replace(spc_date_string + '/', '')) target_directory_name = '{0:s}/{1:s}/{2:s}'.format(top_target_dir_name, spc_date_string[:4], spc_date_string) unzipping.unzip_tar(tar_file_name, target_directory_name=target_directory_name, file_and_dir_names_to_unzip=directory_names_to_unzip) return target_directory_name
def classification_cutoffs_to_ranges(class_cutoffs, non_negative_only=True): """Converts classification cutoffs to min/max for each class. C = number of classes c = C - 1 = number of cutoffs :param class_cutoffs: length-c numpy array of class cutoffs. :param non_negative_only: Boolean flag. If True, class cutoffs/minima/ maxima must be non-negative. :return: class_cutoffs: Same as input, but containing only unique values and sorted in ascending order. :return: class_minima: length-C numpy array of class minima, sorted in ascending order. :return: class_maxima: length-C numpy array of class maxima, sorted in ascending order. """ error_checking.assert_is_boolean(non_negative_only) error_checking.assert_is_numpy_array(class_cutoffs, num_dimensions=1) if non_negative_only: error_checking.assert_is_greater_numpy_array(class_cutoffs, 0.) else: error_checking.assert_is_numpy_array_without_nan(class_cutoffs) class_cutoffs = numpy.sort(numpy.unique(class_cutoffs)) num_classes = len(class_cutoffs) + 1 class_minima = numpy.full(num_classes, numpy.nan) class_maxima = numpy.full(num_classes, numpy.nan) for k in range(num_classes): if k == 0: class_maxima[k] = class_cutoffs[k] if non_negative_only: class_minima[k] = 0. else: class_minima[k] = -numpy.inf elif k == num_classes - 1: class_minima[k] = class_cutoffs[k - 1] class_maxima[k] = numpy.inf else: class_minima[k] = class_cutoffs[k - 1] class_maxima[k] = class_cutoffs[k] return class_cutoffs, class_minima, class_maxima
def loss(target_tensor, forecast_probability_tensor): """Computes weighted cross-entropy. :param target_tensor: See docstring for the 3 possible formats. :param forecast_probability_tensor: Same. :return: loss: Weighted cross-entropy. """ error_checking.assert_is_greater_numpy_array(class_weights, 0.) num_dimensions = _get_num_tensor_dimensions(target_tensor) if num_dimensions == 1: error_checking.assert_is_numpy_array(class_weights, exact_dimensions=numpy.array( [2])) else: error_checking.assert_is_numpy_array(class_weights, num_dimensions=1) num_classes = len(class_weights) class_weight_tensor = tensorflow.convert_to_tensor(class_weights, dtype='float32') class_weight_tensor = K.reshape(class_weight_tensor, (num_classes, 1)) if num_dimensions == 1: example_weight_tensor = K.dot( keras.utils.to_categorical(target_tensor, num_classes), class_weight_tensor) else: example_weight_tensor = K.dot(target_tensor, class_weight_tensor) example_weight_tensor = K.reshape(example_weight_tensor, K.shape(example_weight_tensor)[:-1]) return K.mean(example_weight_tensor * K.categorical_crossentropy( target_tensor, forecast_probability_tensor))
def write_standard_file(pickle_file_name, denorm_predictor_matrices, cam_matrices, guided_cam_matrices, full_storm_id_strings, storm_times_unix_sec, model_file_name, target_class, target_layer_name, sounding_pressure_matrix_pa=None): """Writes class-activation maps (one per storm object) to Pickle file. E = number of examples (storm objects) H = number of sounding heights :param pickle_file_name: Path to output file. :param denorm_predictor_matrices: See doc for `_check_in_and_out_matrices`. :param cam_matrices: Same. :param guided_cam_matrices: Same. :param full_storm_id_strings: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to model that created saliency maps (readable by `cnn.read_model`). :param target_class: Target class. `cam_matrices` and `guided_cam_matrices` contain activations for the [k + 1]th class, where k = `target_class`. :param target_layer_name: Name of target layer. :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure levels. Needed only if the model is trained with soundings but without pressure as a predictor. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_integer(target_class) error_checking.assert_is_geq(target_class, 0) error_checking.assert_is_string(target_layer_name) error_checking.assert_is_string_list(full_storm_id_strings) error_checking.assert_is_numpy_array(numpy.array(full_storm_id_strings), num_dimensions=1) num_examples = len(full_storm_id_strings) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array(storm_times_unix_sec, exact_dimensions=these_expected_dim) _check_in_and_out_matrices(predictor_matrices=denorm_predictor_matrices, num_examples=num_examples, cam_matrices=cam_matrices, guided_cam_matrices=guided_cam_matrices) if sounding_pressure_matrix_pa is not None: error_checking.assert_is_numpy_array_without_nan( sounding_pressure_matrix_pa) error_checking.assert_is_greater_numpy_array( sounding_pressure_matrix_pa, 0.) error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa, num_dimensions=2) these_expected_dim = numpy.array( (num_examples, ) + sounding_pressure_matrix_pa.shape[1:], dtype=int) error_checking.assert_is_numpy_array( sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim) gradcam_dict = { PREDICTOR_MATRICES_KEY: denorm_predictor_matrices, CAM_MATRICES_KEY: cam_matrices, GUIDED_CAM_MATRICES_KEY: guided_cam_matrices, MODEL_FILE_KEY: model_file_name, FULL_STORM_IDS_KEY: full_storm_id_strings, STORM_TIMES_KEY: storm_times_unix_sec, TARGET_CLASS_KEY: target_class, TARGET_LAYER_KEY: target_layer_name, SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(gradcam_dict, pickle_file_handle) pickle_file_handle.close()
def create_2d_net( num_input_features, first_spatial_dimensions, upsampling_factors, num_output_channels, l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT, use_transposed_conv=True, activation_function_name=None, alpha_for_elu=DEFAULT_ALPHA_FOR_ELU, alpha_for_relu=DEFAULT_ALPHA_FOR_RELU, use_activn_for_last_layer=False, use_batch_norm=True, use_batch_norm_for_last_layer=True): """Creates (but does not train) upconvnet with 2 spatial dimensions. L = number of main (transposed-conv or upsampling) layers :param num_input_features: Length of input feature vector. :param first_spatial_dimensions: length-2 numpy array of dimensions in first main layer. The order should be (num_rows, num_columns). Before it is passed to the first main layer, the feature vector will be reshaped into a grid with these dimensions. :param upsampling_factors: length-L numpy array of upsampling factors. :param num_output_channels: See doc for `create_3d_net`. :param l1_weight: Same. :param l2_weight: Same. :param use_transposed_conv: Same. :param activation_function_name: Same. :param alpha_for_elu: Same. :param alpha_for_relu: Same. :param use_activn_for_last_layer: Same. :param use_batch_norm: Same. :param use_batch_norm_for_last_layer: Same. :return: model_object: Same. """ # TODO(thunderhoser): This method assumes that the original CNN does # edge-padding. # Check input args. error_checking.assert_is_integer(num_input_features) error_checking.assert_is_greater(num_input_features, 0) error_checking.assert_is_integer(num_output_channels) error_checking.assert_is_greater(num_output_channels, 0) error_checking.assert_is_geq(l1_weight, 0.) error_checking.assert_is_geq(l2_weight, 0.) error_checking.assert_is_boolean(use_transposed_conv) error_checking.assert_is_boolean(use_activn_for_last_layer) error_checking.assert_is_boolean(use_batch_norm) error_checking.assert_is_boolean(use_batch_norm_for_last_layer) error_checking.assert_is_numpy_array( first_spatial_dimensions, exact_dimensions=numpy.array([2], dtype=int) ) error_checking.assert_is_integer_numpy_array(first_spatial_dimensions) error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0) error_checking.assert_is_numpy_array(upsampling_factors, num_dimensions=1) error_checking.assert_is_integer_numpy_array(upsampling_factors) error_checking.assert_is_geq_numpy_array(upsampling_factors, 1) # Set up CNN architecture. regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight) input_layer_object = keras.layers.Input(shape=(num_input_features,)) current_num_filters = int(numpy.round( num_input_features / numpy.prod(first_spatial_dimensions) )) first_dimensions = numpy.concatenate(( first_spatial_dimensions, numpy.array([current_num_filters], dtype=int) )) layer_object = keras.layers.Reshape( target_shape=first_dimensions )(input_layer_object) num_main_layers = len(upsampling_factors) kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE) for i in range(num_main_layers): if i == num_main_layers - 1: current_num_filters = num_output_channels + 0 # layer_object = keras.layers.ZeroPadding2D( # padding=((1, 0), (1, 0)), data_format='channels_last' # )(layer_object) elif upsampling_factors[i] == 1: current_num_filters = int(numpy.round(current_num_filters / 2)) this_stride_tuple = (upsampling_factors[i], upsampling_factors[i]) if use_transposed_conv: layer_object = keras.layers.Conv2DTranspose( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=this_stride_tuple, padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) else: if upsampling_factors[i] > 1: try: layer_object = keras.layers.UpSampling2D( size=this_stride_tuple, data_format='channels_last', interpolation='bilinear' )(layer_object) except: layer_object = keras.layers.UpSampling2D( size=this_stride_tuple, data_format='channels_last' )(layer_object) layer_object = keras.layers.Conv2D( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) use_activation_here = ( activation_function_name is not None and (i < num_main_layers - 1 or use_activn_for_last_layer) ) if use_activation_here: layer_object = architecture_utils.get_activation_layer( activation_function_string=activation_function_name, alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu )(layer_object) use_batch_norm_here = ( use_batch_norm and (i < num_main_layers - 1 or use_batch_norm_for_last_layer) ) if use_batch_norm_here: layer_object = ( architecture_utils.get_batch_norm_layer()(layer_object) ) # Compile CNN. model_object = keras.models.Model( inputs=input_layer_object, outputs=layer_object) model_object.compile( loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam() ) model_object.summary() return model_object
def plot_saliency_for_sounding(saliency_matrix, sounding_field_names, pressure_levels_mb, colour_map_object, max_absolute_colour_value, min_font_size=DEFAULT_MIN_SOUNDING_FONT_SIZE, max_font_size=DEFAULT_MAX_SOUNDING_FONT_SIZE): """Plots saliency for one sounding. P = number of pressure levels F = number of fields :param saliency_matrix: P-by-F numpy array of saliency values. :param sounding_field_names: length-F list of field names. :param pressure_levels_mb: length-P list of pressure levels (millibars). :param colour_map_object: See doc for `plot_2d_grid`. :param max_absolute_colour_value: Same. :param min_font_size: Same. :param max_font_size: Same. """ error_checking.assert_is_geq(max_absolute_colour_value, 0.) max_absolute_colour_value = max([max_absolute_colour_value, 0.001]) error_checking.assert_is_greater_numpy_array(pressure_levels_mb, 0.) error_checking.assert_is_numpy_array(pressure_levels_mb, num_dimensions=1) error_checking.assert_is_list(sounding_field_names) error_checking.assert_is_numpy_array(numpy.array(sounding_field_names), num_dimensions=1) num_pressure_levels = len(pressure_levels_mb) num_sounding_fields = len(sounding_field_names) error_checking.assert_is_numpy_array_without_nan(saliency_matrix) error_checking.assert_is_numpy_array(saliency_matrix, exact_dimensions=numpy.array([ num_pressure_levels, num_sounding_fields ])) try: u_wind_index = sounding_field_names.index(soundings.U_WIND_NAME) v_wind_index = sounding_field_names.index(soundings.V_WIND_NAME) plot_wind_barbs = True except ValueError: plot_wind_barbs = False if plot_wind_barbs: u_wind_saliency_values = saliency_matrix[:, u_wind_index] v_wind_saliency_values = saliency_matrix[:, v_wind_index] wind_saliency_magnitudes = numpy.sqrt(u_wind_saliency_values**2 + v_wind_saliency_values**2) colour_norm_object = pyplot.Normalize(vmin=0., vmax=max_absolute_colour_value) rgb_matrix_for_wind = colour_map_object( colour_norm_object(wind_saliency_magnitudes))[..., :-1] non_wind_flags = numpy.array( [f not in WIND_COMPONENT_NAMES for f in sounding_field_names], dtype=bool) non_wind_indices = numpy.where(non_wind_flags)[0] saliency_matrix = saliency_matrix[:, non_wind_indices] sounding_field_names = [ sounding_field_names[k] for k in non_wind_indices ] sounding_field_names.append(WIND_NAME) num_sounding_fields = len(sounding_field_names) rgb_matrix, font_size_matrix = _saliency_to_colour_and_size( saliency_matrix=saliency_matrix, colour_map_object=colour_map_object, max_absolute_colour_value=max_absolute_colour_value, min_font_size=min_font_size, max_font_size=max_font_size) _, axes_object = pyplot.subplots(1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)) axes_object.set_facecolor( plotting_utils.colour_from_numpy_to_tuple( SOUNDING_SALIENCY_BACKGROUND_COLOUR)) for k in range(num_sounding_fields): if sounding_field_names[k] == WIND_NAME: for j in range(num_pressure_levels): this_vector = numpy.array( [u_wind_saliency_values[j], v_wind_saliency_values[j]]) this_vector = (WIND_SALIENCY_MULTIPLIER * this_vector / numpy.linalg.norm(this_vector, ord=2)) this_colour_tuple = plotting_utils.colour_from_numpy_to_tuple( rgb_matrix_for_wind[j, ...]) axes_object.barbs(k, pressure_levels_mb[j], this_vector[0], this_vector[1], length=WIND_BARB_LENGTH, fill_empty=True, rounding=False, sizes={'emptybarb': EMPTY_WIND_BARB_RADIUS}, color=this_colour_tuple) continue for j in range(num_pressure_levels): this_colour_tuple = plotting_utils.colour_from_numpy_to_tuple( rgb_matrix[j, k, ...]) if saliency_matrix[j, k] >= 0: axes_object.text(k, pressure_levels_mb[j], '+', fontsize=font_size_matrix[j, k], color=this_colour_tuple, horizontalalignment='center', verticalalignment='center') else: axes_object.text(k, pressure_levels_mb[j], '_', fontsize=font_size_matrix[j, k], color=this_colour_tuple, horizontalalignment='center', verticalalignment='bottom') axes_object.set_xlim(-0.5, num_sounding_fields - 0.5) axes_object.set_ylim(100, 1000) axes_object.invert_yaxis() pyplot.yscale('log') pyplot.minorticks_off() y_tick_locations = numpy.linspace(100, 1000, num=10, dtype=int) y_tick_labels = ['{0:d}'.format(p) for p in y_tick_locations] pyplot.yticks(y_tick_locations, y_tick_labels) x_tick_locations = numpy.linspace(0, num_sounding_fields - 1, num=num_sounding_fields, dtype=float) x_tick_labels = [FIELD_NAME_TO_LATEX_DICT[f] for f in sounding_field_names] pyplot.xticks(x_tick_locations, x_tick_labels) colour_bar_object = plotting_utils.plot_linear_colour_bar( axes_object_or_matrix=axes_object, data_matrix=saliency_matrix, colour_map_object=colour_map_object, min_value=0., max_value=max_absolute_colour_value, orientation_string='vertical', extend_min=True, extend_max=True) colour_bar_object.set_label('Saliency (absolute value)')
def _check_args(option_dict): """Error-checks input arguments. L = number of levels in encoder = number of levels in decoder D = number of dense layers :param option_dict: Dictionary with the following keys. option_dict['input_dimensions']: numpy array with input dimensions (num_heights, num_channels). option_dict['num_levels']: L in the above discussion. option_dict['num_conv_layers_by_level']: length-(L + 1) numpy array with number of conv layers at each level. option_dict['num_channels_by_level']: length-(L + 1) numpy array with number of channels at each level. option_dict['encoder_dropout_rate_by_level']: length-(L + 1) numpy array with dropout rate for conv layers in encoder at each level. option_dict['upconv_dropout_rate_by_level']: length-L numpy array with dropout rate for upconv layers at each level. option_dict['skip_dropout_rate_by_level']: length-L numpy array with dropout rate for conv layer after skip connection at each level. option_dict['include_penultimate_conv']: Boolean flag. If True, will put in extra conv layer (with 3 x 3 filter) before final pixelwise conv. option_dict['penultimate_conv_dropout_rate']: Dropout rate for penultimate conv layer. option_dict['dense_layer_neuron_nums']: length-D numpy array with number of neurons in each dense layer. option_dict['dense_layer_dropout_rates']: length-D numpy array with dropout rate for each dense layer. option_dict['inner_activ_function_name']: Name of activation function for all inner (non-output) layers. Must be accepted by `architecture_utils.check_activation_function`. option_dict['inner_activ_function_alpha']: Alpha (slope parameter) for activation function for all inner layers. Applies only to ReLU and eLU. option_dict['output_activ_function_name']: Same as `inner_activ_function_name` but for output layer. option_dict['output_activ_function_alpha']: Same as `inner_activ_function_alpha` but for output layer. option_dict['l1_weight']: Weight for L_1 regularization. option_dict['l2_weight']: Weight for L_2 regularization. option_dict['use_batch_normalization']: Boolean flag. If True, will use batch normalization after each inner (non-output) conv layer. :return: option_dict: Same as input, except defaults may have been added. """ orig_option_dict = option_dict.copy() option_dict = DEFAULT_ARCHITECTURE_OPTION_DICT.copy() option_dict.update(orig_option_dict) input_dimensions = option_dict[INPUT_DIMENSIONS_KEY] error_checking.assert_is_numpy_array(input_dimensions, exact_dimensions=numpy.array( [2], dtype=int)) error_checking.assert_is_integer_numpy_array(input_dimensions) error_checking.assert_is_greater_numpy_array(input_dimensions, 0) num_levels = option_dict[NUM_LEVELS_KEY] error_checking.assert_is_integer(num_levels) error_checking.assert_is_geq(num_levels, 2) expected_dim = numpy.array([num_levels + 1], dtype=int) num_conv_layers_by_level = option_dict[CONV_LAYER_COUNTS_KEY] error_checking.assert_is_numpy_array(num_conv_layers_by_level, exact_dimensions=expected_dim) error_checking.assert_is_integer_numpy_array(num_conv_layers_by_level) error_checking.assert_is_greater_numpy_array(num_conv_layers_by_level, 0) num_channels_by_level = option_dict[CHANNEL_COUNTS_KEY] error_checking.assert_is_numpy_array(num_channels_by_level, exact_dimensions=expected_dim) error_checking.assert_is_integer_numpy_array(num_channels_by_level) error_checking.assert_is_greater_numpy_array(num_channels_by_level, 0) encoder_dropout_rate_by_level = option_dict[ENCODER_DROPOUT_RATES_KEY] error_checking.assert_is_numpy_array(encoder_dropout_rate_by_level, exact_dimensions=expected_dim) error_checking.assert_is_leq_numpy_array(encoder_dropout_rate_by_level, 1., allow_nan=True) expected_dim = numpy.array([num_levels], dtype=int) upconv_dropout_rate_by_level = option_dict[UPCONV_DROPOUT_RATES_KEY] error_checking.assert_is_numpy_array(upconv_dropout_rate_by_level, exact_dimensions=expected_dim) error_checking.assert_is_leq_numpy_array(upconv_dropout_rate_by_level, 1., allow_nan=True) skip_dropout_rate_by_level = option_dict[SKIP_DROPOUT_RATES_KEY] error_checking.assert_is_numpy_array(skip_dropout_rate_by_level, exact_dimensions=expected_dim) error_checking.assert_is_leq_numpy_array(skip_dropout_rate_by_level, 1., allow_nan=True) error_checking.assert_is_boolean(option_dict[INCLUDE_PENULTIMATE_KEY]) error_checking.assert_is_leq(option_dict[PENULTIMATE_DROPOUT_RATE_KEY], 1., allow_nan=True) dense_layer_neuron_nums = option_dict[DENSE_LAYER_NEURON_NUMS_KEY] dense_layer_dropout_rates = option_dict[DENSE_LAYER_DROPOUT_RATES_KEY] has_dense_layers = not (dense_layer_neuron_nums is None and dense_layer_dropout_rates is None) if has_dense_layers: error_checking.assert_is_integer_numpy_array(dense_layer_neuron_nums) error_checking.assert_is_numpy_array(dense_layer_neuron_nums, num_dimensions=1) error_checking.assert_is_geq_numpy_array(dense_layer_neuron_nums, 1) num_dense_layers = len(dense_layer_neuron_nums) expected_dim = numpy.array([num_dense_layers], dtype=int) error_checking.assert_is_numpy_array(dense_layer_dropout_rates, exact_dimensions=expected_dim) error_checking.assert_is_leq_numpy_array(dense_layer_dropout_rates, 1., allow_nan=True) error_checking.assert_is_geq(option_dict[L1_WEIGHT_KEY], 0.) error_checking.assert_is_geq(option_dict[L2_WEIGHT_KEY], 0.) error_checking.assert_is_boolean(option_dict[USE_BATCH_NORM_KEY]) return option_dict
def find_convective_pixels(reflectivity_matrix_dbz, grid_metadata_dict, valid_time_unix_sec, option_dict): """Classifies pixels (horiz grid points) as convective or non-convective. :param reflectivity_matrix_dbz: M-by-N-by-H numpy array of reflectivity values. Latitude should increase along the first axis; longitude should increase along the second axis; height should increase along the third axis. MAKE SURE NOT TO FLIP YOUR LATITUDES. :param grid_metadata_dict: Dictionary with the following keys. grid_metadata_dict['min_grid_point_latitude_deg']: Minimum latitude (deg N) over all grid points. grid_metadata_dict['latitude_spacing_deg']: Spacing (deg N) between grid points in adjacent rows. grid_metadata_dict['min_grid_point_longitude_deg']: Minimum longitude (deg E) over all grid points. grid_metadata_dict['longitude_spacing_deg']: Spacing (deg E) between grid points in adjacent columns. grid_metadata_dict['grid_point_heights_m_asl']: length-H numpy array of heights (metres above sea level) at grid points. :param valid_time_unix_sec: Valid time. :param option_dict: Dictionary with the following keys. option_dict['peakedness_neigh_metres'] Neighbourhood radius for peakedness calculations (metres), used for criterion 1. option_dict['max_peakedness_height_m_asl'] Max height (metres above sea level) for peakedness calculations, used in criterion 1. option_dict['min_height_fraction_for_peakedness']: Minimum fraction of heights that exceed peakedness threshold, used in criterion 1. At each horizontal location, at least this fraction of heights must exceed the threshold. option_dict['halve_resolution_for_peakedness'] Boolean flag. If True, horizontal grid resolution will be halved for peakedness calculations. option_dict['min_echo_top_m_asl'] Minimum echo top (metres above sea level), used for criterion 3. option_dict['echo_top_level_dbz'] Critical reflectivity (used to compute echo top for criterion 3). option_dict['min_size_pixels']: Minimum connected-region size (for criterion 4). option_dict['min_composite_refl_criterion1_dbz'] Minimum composite (column-max) reflectivity for criterion 1. This may be None. option_dict['min_composite_refl_criterion5_dbz'] Minimum composite reflectivity for criterion 5. option_dict['min_composite_refl_aml_dbz'] Minimum composite reflectivity above melting level, used for criterion 2. :return: convective_flag_matrix: M-by-N numpy array of Boolean flags (True if convective, False if not). :return: option_dict: Same as input, except some values may have been replaced by defaults. """ # Error-checking. error_checking.assert_is_numpy_array(reflectivity_matrix_dbz, num_dimensions=3) option_dict = _check_input_args(option_dict) peakedness_neigh_metres = option_dict[PEAKEDNESS_NEIGH_KEY] max_peakedness_height_m_asl = option_dict[MAX_PEAKEDNESS_HEIGHT_KEY] min_height_fraction_for_peakedness = option_dict[MIN_HEIGHT_FRACTION_KEY] halve_resolution_for_peakedness = option_dict[HALVE_RESOLUTION_KEY] min_echo_top_m_asl = option_dict[MIN_ECHO_TOP_KEY] echo_top_level_dbz = option_dict[ECHO_TOP_LEVEL_KEY] min_size_pixels = option_dict[MIN_SIZE_KEY] min_composite_refl_criterion1_dbz = ( option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY]) min_composite_refl_criterion5_dbz = ( option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY]) min_composite_refl_aml_dbz = option_dict[MIN_COMPOSITE_REFL_AML_KEY] grid_point_heights_m_asl = numpy.round( grid_metadata_dict[HEIGHTS_KEY]).astype(int) error_checking.assert_is_numpy_array(grid_point_heights_m_asl, num_dimensions=1) error_checking.assert_is_geq_numpy_array(grid_point_heights_m_asl, 0) error_checking.assert_is_greater_numpy_array( numpy.diff(grid_point_heights_m_asl), 0) # Compute grid-point coordinates. num_rows = reflectivity_matrix_dbz.shape[0] num_columns = reflectivity_matrix_dbz.shape[1] grid_point_latitudes_deg, grid_point_longitudes_deg = ( grids.get_latlng_grid_points( min_latitude_deg=grid_metadata_dict[MIN_LATITUDE_KEY], min_longitude_deg=grid_metadata_dict[MIN_LONGITUDE_KEY], lat_spacing_deg=grid_metadata_dict[LATITUDE_SPACING_KEY], lng_spacing_deg=grid_metadata_dict[LONGITUDE_SPACING_KEY], num_rows=num_rows, num_columns=num_columns)) grid_metadata_dict[LATITUDES_KEY] = grid_point_latitudes_deg grid_metadata_dict[LONGITUDES_KEY] = grid_point_longitudes_deg reflectivity_matrix_dbz[numpy.isnan(reflectivity_matrix_dbz)] = 0. print('Applying criterion 1 for convective classification...') convective_flag_matrix = _apply_convective_criterion1( reflectivity_matrix_dbz=reflectivity_matrix_dbz, peakedness_neigh_metres=peakedness_neigh_metres, max_peakedness_height_m_asl=max_peakedness_height_m_asl, min_height_fraction=min_height_fraction_for_peakedness, halve_resolution_for_peakedness=halve_resolution_for_peakedness, min_composite_refl_dbz=min_composite_refl_criterion1_dbz, grid_metadata_dict=grid_metadata_dict) print('Number of convective pixels = {0:d}'.format( numpy.sum(convective_flag_matrix))) print('Applying criterion 2 for convective classification...') convective_flag_matrix = _apply_convective_criterion2( reflectivity_matrix_dbz=reflectivity_matrix_dbz, convective_flag_matrix=convective_flag_matrix, grid_metadata_dict=grid_metadata_dict, valid_time_unix_sec=valid_time_unix_sec, min_composite_refl_aml_dbz=min_composite_refl_aml_dbz) print('Number of convective pixels = {0:d}'.format( numpy.sum(convective_flag_matrix))) print('Applying criterion 3 for convective classification...') convective_flag_matrix = _apply_convective_criterion3( reflectivity_matrix_dbz=reflectivity_matrix_dbz, convective_flag_matrix=convective_flag_matrix, grid_metadata_dict=grid_metadata_dict, min_echo_top_m_asl=min_echo_top_m_asl, echo_top_level_dbz=echo_top_level_dbz) print('Number of convective pixels = {0:d}'.format( numpy.sum(convective_flag_matrix))) print('Applying criterion 4 for convective classification...') convective_flag_matrix = _apply_convective_criterion4( convective_flag_matrix=convective_flag_matrix, min_size_pixels=min_size_pixels) print('Number of convective pixels = {0:d}'.format( numpy.sum(convective_flag_matrix))) print('Applying criterion 5 for convective classification...') convective_flag_matrix = _apply_convective_criterion5( reflectivity_matrix_dbz=reflectivity_matrix_dbz, convective_flag_matrix=convective_flag_matrix, min_composite_refl_dbz=min_composite_refl_criterion5_dbz) return convective_flag_matrix, option_dict
def write_standard_file(pickle_file_name, denorm_predictor_matrices, saliency_matrices, full_storm_id_strings, storm_times_unix_sec, model_file_name, metadata_dict, sounding_pressure_matrix_pa=None): """Writes saliency maps (one per storm object) to Pickle file. E = number of examples (storm objects) H = number of sounding heights :param pickle_file_name: Path to output file. :param denorm_predictor_matrices: See doc for `_check_in_and_out_matrices`. :param saliency_matrices: Same. :param full_storm_id_strings: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to model that created saliency maps (readable by `cnn.read_model`). :param metadata_dict: Dictionary created by `check_metadata`. :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure levels. Needed only if the model is trained with soundings but without pressure as a predictor. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_string_list(full_storm_id_strings) error_checking.assert_is_numpy_array(numpy.array(full_storm_id_strings), num_dimensions=1) num_examples = len(full_storm_id_strings) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array(storm_times_unix_sec, exact_dimensions=these_expected_dim) _check_in_and_out_matrices(predictor_matrices=denorm_predictor_matrices, num_examples=num_examples, saliency_matrices=saliency_matrices) if sounding_pressure_matrix_pa is not None: error_checking.assert_is_numpy_array_without_nan( sounding_pressure_matrix_pa) error_checking.assert_is_greater_numpy_array( sounding_pressure_matrix_pa, 0.) error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa, num_dimensions=2) these_expected_dim = numpy.array( (num_examples, ) + sounding_pressure_matrix_pa.shape[1:], dtype=int) error_checking.assert_is_numpy_array( sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim) saliency_dict = { PREDICTOR_MATRICES_KEY: denorm_predictor_matrices, SALIENCY_MATRICES_KEY: saliency_matrices, FULL_STORM_IDS_KEY: full_storm_id_strings, STORM_TIMES_KEY: storm_times_unix_sec, MODEL_FILE_KEY: model_file_name, COMPONENT_TYPE_KEY: metadata_dict[COMPONENT_TYPE_KEY], TARGET_CLASS_KEY: metadata_dict[TARGET_CLASS_KEY], LAYER_NAME_KEY: metadata_dict[LAYER_NAME_KEY], IDEAL_ACTIVATION_KEY: metadata_dict[IDEAL_ACTIVATION_KEY], NEURON_INDICES_KEY: metadata_dict[NEURON_INDICES_KEY], CHANNEL_INDEX_KEY: metadata_dict[CHANNEL_INDEX_KEY], SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(saliency_dict, pickle_file_handle) pickle_file_handle.close()
def write_standard_file(pickle_file_name, denorm_input_matrices, denorm_output_matrices, initial_activations, final_activations, model_file_name, metadata_dict, full_storm_id_strings=None, storm_times_unix_sec=None, sounding_pressure_matrix_pa=None): """Writes backwards-optimized examples to Pickle file. E = number of examples (storm objects) H = number of sounding heights If input matrices do not come from real examples, `full_storm_id_strings` and `storm_times_unix_sec` can be None. :param pickle_file_name: Path to output file. :param denorm_input_matrices: See doc for `_check_in_and_out_matrices`. :param denorm_output_matrices: Same. :param initial_activations: length-E numpy array of initial model activations (before backwards optimization). :param final_activations: length-E numpy array of final model activations (after backwards optimization). :param model_file_name: Path to model that created saliency maps (readable by `cnn.read_model`). :param metadata_dict: Dictionary created by `check_metadata`. :param full_storm_id_strings: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of storm times. :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure levels. Needed only if `denorm_input_matrices` contains soundings from real examples but without pressure as a predictor. """ error_checking.assert_is_string(model_file_name) used_real_examples = not (full_storm_id_strings is None and storm_times_unix_sec is None) if used_real_examples: error_checking.assert_is_string_list(full_storm_id_strings) error_checking.assert_is_numpy_array( numpy.array(full_storm_id_strings), num_dimensions=1) num_examples = len(full_storm_id_strings) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array( storm_times_unix_sec, exact_dimensions=these_expected_dim) else: num_examples = denorm_input_matrices[0].shape[0] sounding_pressure_matrix_pa = None _check_in_and_out_matrices(input_matrices=denorm_input_matrices, num_examples=num_examples, output_matrices=denorm_output_matrices) these_expected_dim = numpy.array([num_examples], dtype=int) error_checking.assert_is_numpy_array_without_nan(initial_activations) error_checking.assert_is_numpy_array(initial_activations, exact_dimensions=these_expected_dim) error_checking.assert_is_numpy_array_without_nan(final_activations) error_checking.assert_is_numpy_array(final_activations, exact_dimensions=these_expected_dim) if sounding_pressure_matrix_pa is not None: error_checking.assert_is_numpy_array_without_nan( sounding_pressure_matrix_pa) error_checking.assert_is_greater_numpy_array( sounding_pressure_matrix_pa, 0.) error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa, num_dimensions=2) these_expected_dim = numpy.array( (num_examples, ) + sounding_pressure_matrix_pa.shape[1:], dtype=int) error_checking.assert_is_numpy_array( sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim) bwo_dictionary = { INPUT_MATRICES_KEY: denorm_input_matrices, OUTPUT_MATRICES_KEY: denorm_output_matrices, INITIAL_ACTIVATIONS_KEY: initial_activations, FINAL_ACTIVATIONS_KEY: final_activations, MODEL_FILE_KEY: model_file_name, FULL_STORM_IDS_KEY: full_storm_id_strings, STORM_TIMES_KEY: storm_times_unix_sec, NUM_ITERATIONS_KEY: metadata_dict[NUM_ITERATIONS_KEY], LEARNING_RATE_KEY: metadata_dict[LEARNING_RATE_KEY], L2_WEIGHT_KEY: metadata_dict[L2_WEIGHT_KEY], RADAR_CONSTRAINT_WEIGHT_KEY: metadata_dict[RADAR_CONSTRAINT_WEIGHT_KEY], MINMAX_CONSTRAINT_WEIGHT_KEY: metadata_dict[MINMAX_CONSTRAINT_WEIGHT_KEY], COMPONENT_TYPE_KEY: metadata_dict[COMPONENT_TYPE_KEY], TARGET_CLASS_KEY: metadata_dict[TARGET_CLASS_KEY], LAYER_NAME_KEY: metadata_dict[LAYER_NAME_KEY], IDEAL_ACTIVATION_KEY: metadata_dict[IDEAL_ACTIVATION_KEY], NEURON_INDICES_KEY: metadata_dict[NEURON_INDICES_KEY], CHANNEL_INDEX_KEY: metadata_dict[CHANNEL_INDEX_KEY], SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(bwo_dictionary, pickle_file_handle) pickle_file_handle.close()
def train_neural_net( training_table, feature_names, target_name, replace_missing, standardize, transform_via_svd, replacement_method=feature_trans.MEAN_VALUE_REPLACEMENT_METHOD, fraction_of_explained_variance_for_svd= DEFAULT_EXP_VARIANCE_FRACTION_FOR_SVD, hidden_layer_sizes=DEFAULT_HIDDEN_LAYER_SIZES_FOR_NN, hidden_layer_activation_function=DEFAULT_ACTIVATION_FUNCTION_FOR_NN, solver=DEFAULT_SOLVER_FOR_NN, l2_weight=DEFAULT_L2_WEIGHT_FOR_NN, num_examples_per_batch=DEFAULT_BATCH_SIZE_FOR_NN, learning_rate=DEFAULT_LEARNING_RATE_FOR_NN, max_num_epochs=DEFAULT_MAX_NUM_EPOCHS_FOR_NN, convergence_tolerance=DEFAULT_CONVERGENCE_TOLERANCE_FOR_NN, allow_early_stopping=True, early_stopping_fraction=DEFAULT_EARLY_STOPPING_FRACTION_FOR_NN): """Trains a neural net for binary classification. H = number of hidden layers :param training_table: See documentation for _check_training_data. :param feature_names: See doc for _check_training_data. :param target_name: See doc for _check_training_data. :param replace_missing: See documentation for _preprocess_data_for_learning. :param standardize: See doc for _preprocess_data_for_learning. :param transform_via_svd: See doc for _preprocess_data_for_learning. :param replacement_method: See doc for _preprocess_data_for_learning. :param fraction_of_explained_variance_for_svd: See doc for _preprocess_data_for_learning. :param hidden_layer_sizes: length-H numpy array, where the [i]th element is the number of nodes in the [i]th hidden layer. :param hidden_layer_activation_function: Activation function for hidden layers. See `sklearn.neural_network.MLPClassifier` documentation for valid options. :param solver: Solver. Valid options are "sgd" and "adam". :param l2_weight: Weight for L2 penalty. :param num_examples_per_batch: Number of examples per training batch. :param learning_rate: Learning rate. :param max_num_epochs: Max number of training epochs (passes over training data). :param convergence_tolerance: Stopping criterion. Training will stop when loss has improved by < `convergence_tolerance` for each of two consecutive epochs. :param allow_early_stopping: Boolean flag. If True, some training data will be set aside as "validation data" to check for early stopping. In this case, training will stop when loss has improved by < `convergence_tolerance` for each of two consecutive epochs. :param early_stopping_fraction: Fraction of training examples to use when checking early-stopping criterion. :return: model_object: Trained model (instance of `sklearn.neural_network.MLPClassifier`). :return: replacement_dict: See doc for _preprocess_data_for_learning. :return: standardization_dict: See doc for _preprocess_data_for_learning. :return: svd_dictionary: See doc for _preprocess_data_for_learning. :raises: ValueError: if `solver not in VALID_SOLVERS_FOR_NN`. """ _check_input_data_for_learning( input_table=training_table, feature_names=feature_names, target_name=target_name) (preprocessed_training_table, preprocessed_feature_names, replacement_dict, standardization_dict, svd_dictionary) = _preprocess_data_for_learning( input_table=training_table, feature_names=feature_names, learning_phase=TRAINING_PHASE, replace_missing=replace_missing, standardize=standardize, transform_via_svd=transform_via_svd, replacement_method=replacement_method, fraction_of_explained_variance_for_svd= fraction_of_explained_variance_for_svd) error_checking.assert_is_integer_numpy_array(hidden_layer_sizes) error_checking.assert_is_numpy_array(hidden_layer_sizes, num_dimensions=1) error_checking.assert_is_greater_numpy_array(hidden_layer_sizes, 0) error_checking.assert_is_string(solver) if solver not in VALID_SOLVERS_FOR_NN: error_string = ( '\n\n{0:s}\n\nValid solvers (listed above) do not include "{1:s}".' ).format(str(VALID_SOLVERS_FOR_NN), solver) raise ValueError(error_string) error_checking.assert_is_integer(num_examples_per_batch) error_checking.assert_is_geq(num_examples_per_batch, 2) error_checking.assert_is_greater(learning_rate, 0.) error_checking.assert_is_leq(learning_rate, 1.) error_checking.assert_is_integer(max_num_epochs) error_checking.assert_is_greater(max_num_epochs, 0) error_checking.assert_is_greater(convergence_tolerance, 0.) error_checking.assert_is_boolean(allow_early_stopping) if allow_early_stopping: error_checking.assert_is_greater(early_stopping_fraction, 0.) error_checking.assert_is_less_than(early_stopping_fraction, 0.5) model_object = sklearn.neural_network.MLPClassifier( hidden_layer_sizes=hidden_layer_sizes, activation=hidden_layer_activation_function, solver=solver, alpha=l2_weight, batch_size=num_examples_per_batch, learning_rate_init=learning_rate, max_iter=max_num_epochs, tol=convergence_tolerance, verbose=3, early_stopping=allow_early_stopping, validation_fraction=early_stopping_fraction) model_object.fit( preprocessed_training_table.as_matrix( columns=preprocessed_feature_names), preprocessed_training_table[target_name].values) return model_object, replacement_dict, standardization_dict, svd_dictionary
def write_file(netcdf_file_name, scalar_target_matrix, vector_target_matrix, scalar_prediction_matrix, vector_prediction_matrix, heights_m_agl, example_id_strings, model_file_name, isotonic_model_file_name=None): """Writes predictions to NetCDF file. E = number of examples H = number of heights T_s = number of scalar targets T_v = number of vector targets :param netcdf_file_name: Path to output file. :param scalar_target_matrix: numpy array (E x T_s) with actual values of scalar targets. :param vector_target_matrix: numpy array (E x H x T_v) with actual values of vector targets. :param scalar_prediction_matrix: Same as `scalar_target_matrix` but with predicted values. :param vector_prediction_matrix: Same as `vector_target_matrix` but with predicted values. :param heights_m_agl: length-H numpy array of heights (metres above ground level). :param example_id_strings: length-E list of IDs created by `example_utils.create_example_ids`. :param model_file_name: Path to file with trained model (readable by `neural_net.read_model`). :param isotonic_model_file_name: Path to file with trained isotonic- regression models (readable by `isotonic_regression.read_file`) used to make predictions. If isotonic regression was not used, leave this as None. """ # Check input args. error_checking.assert_is_numpy_array_without_nan(scalar_target_matrix) error_checking.assert_is_numpy_array(scalar_target_matrix, num_dimensions=2) error_checking.assert_is_numpy_array_without_nan(scalar_prediction_matrix) error_checking.assert_is_numpy_array(scalar_prediction_matrix, exact_dimensions=numpy.array( scalar_target_matrix.shape, dtype=int)) error_checking.assert_is_numpy_array_without_nan(vector_target_matrix) error_checking.assert_is_numpy_array(vector_target_matrix, num_dimensions=3) num_examples = scalar_target_matrix.shape[0] expected_dim = numpy.array( (num_examples, ) + vector_target_matrix.shape[1:], dtype=int) error_checking.assert_is_numpy_array(vector_target_matrix, exact_dimensions=expected_dim) error_checking.assert_is_numpy_array_without_nan(vector_prediction_matrix) error_checking.assert_is_numpy_array(vector_prediction_matrix, exact_dimensions=numpy.array( vector_target_matrix.shape, dtype=int)) num_heights = vector_target_matrix.shape[1] error_checking.assert_is_greater_numpy_array(heights_m_agl, 0.) error_checking.assert_is_numpy_array(heights_m_agl, exact_dimensions=numpy.array( [num_heights], dtype=int)) error_checking.assert_is_numpy_array(numpy.array(example_id_strings), exact_dimensions=numpy.array( [num_examples], dtype=int)) example_utils.parse_example_ids(example_id_strings) error_checking.assert_is_string(model_file_name) if isotonic_model_file_name is None: isotonic_model_file_name = '' error_checking.assert_is_string(isotonic_model_file_name) # Write to NetCDF file. file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name) dataset_object = netCDF4.Dataset(netcdf_file_name, 'w', format='NETCDF3_64BIT_OFFSET') dataset_object.setncattr(MODEL_FILE_KEY, model_file_name) dataset_object.setncattr(ISOTONIC_MODEL_FILE_KEY, isotonic_model_file_name) num_examples = vector_target_matrix.shape[0] dataset_object.createDimension(EXAMPLE_DIMENSION_KEY, num_examples) dataset_object.createDimension(HEIGHT_DIMENSION_KEY, vector_target_matrix.shape[1]) dataset_object.createDimension(VECTOR_TARGET_DIMENSION_KEY, vector_target_matrix.shape[2]) num_scalar_targets = scalar_target_matrix.shape[1] if num_scalar_targets > 0: dataset_object.createDimension(SCALAR_TARGET_DIMENSION_KEY, scalar_target_matrix.shape[1]) if num_examples == 0: num_id_characters = 1 else: num_id_characters = numpy.max( numpy.array([len(id) for id in example_id_strings])) dataset_object.createDimension(EXAMPLE_ID_CHAR_DIM_KEY, num_id_characters) this_string_format = 'S{0:d}'.format(num_id_characters) example_ids_char_array = netCDF4.stringtochar( numpy.array(example_id_strings, dtype=this_string_format)) dataset_object.createVariable(EXAMPLE_IDS_KEY, datatype='S1', dimensions=(EXAMPLE_DIMENSION_KEY, EXAMPLE_ID_CHAR_DIM_KEY)) dataset_object.variables[EXAMPLE_IDS_KEY][:] = numpy.array( example_ids_char_array) dataset_object.createVariable(HEIGHTS_KEY, datatype=numpy.float32, dimensions=HEIGHT_DIMENSION_KEY) dataset_object.variables[HEIGHTS_KEY][:] = heights_m_agl if num_scalar_targets > 0: dataset_object.createVariable(SCALAR_TARGETS_KEY, datatype=numpy.float32, dimensions=(EXAMPLE_DIMENSION_KEY, SCALAR_TARGET_DIMENSION_KEY)) dataset_object.variables[SCALAR_TARGETS_KEY][:] = scalar_target_matrix dataset_object.createVariable(SCALAR_PREDICTIONS_KEY, datatype=numpy.float32, dimensions=(EXAMPLE_DIMENSION_KEY, SCALAR_TARGET_DIMENSION_KEY)) dataset_object.variables[SCALAR_PREDICTIONS_KEY][:] = ( scalar_prediction_matrix) these_dimensions = (EXAMPLE_DIMENSION_KEY, HEIGHT_DIMENSION_KEY, VECTOR_TARGET_DIMENSION_KEY) dataset_object.createVariable(VECTOR_TARGETS_KEY, datatype=numpy.float32, dimensions=these_dimensions) dataset_object.variables[VECTOR_TARGETS_KEY][:] = vector_target_matrix dataset_object.createVariable(VECTOR_PREDICTIONS_KEY, datatype=numpy.float32, dimensions=these_dimensions) dataset_object.variables[VECTOR_PREDICTIONS_KEY][:] = ( vector_prediction_matrix) dataset_object.close()
def _run(example_file_name, num_examples, choose_max_heating_rate, max_noise_k_day01, pressure_cutoffs_pa, pressure_spacings_pa, first_interp_method_name, second_interp_method_name, interp_fluxes, output_dir_name): """Runs interpolation experiment. This is effectively the main method. :param example_file_name: See documentation at top of file. :param num_examples: Same. :param choose_max_heating_rate: Same. :param max_noise_k_day01: Same. :param pressure_cutoffs_pa: Same. :param pressure_spacings_pa: Same. :param first_interp_method_name: Same. :param second_interp_method_name: Same. :param interp_fluxes: Same. :param output_dir_name: Same. """ if interp_fluxes: max_noise_k_day01 = 0. error_checking.assert_is_greater(num_examples, 0) error_checking.assert_is_geq(max_noise_k_day01, 0.) error_checking.assert_is_geq_numpy_array(pressure_cutoffs_pa, 0.) error_checking.assert_is_greater_numpy_array( numpy.diff(pressure_cutoffs_pa), 0.) error_checking.assert_is_greater_numpy_array(pressure_spacings_pa, 0.) num_spacings = len(pressure_spacings_pa) expected_dim = numpy.array([num_spacings + 1], dtype=int) error_checking.assert_is_numpy_array(pressure_cutoffs_pa, exact_dimensions=expected_dim) high_res_pressures_pa = numpy.array([], dtype=float) for i in range(num_spacings): this_num_pressures = int( numpy.ceil(1 + (pressure_cutoffs_pa[i + 1] - pressure_cutoffs_pa[i]) / pressure_spacings_pa[i])) these_pressures_pa = numpy.linspace(pressure_cutoffs_pa[i], pressure_cutoffs_pa[i + 1], num=this_num_pressures, dtype=float) if i != num_spacings - 1: these_pressures_pa = these_pressures_pa[:-1] high_res_pressures_pa = numpy.concatenate( (high_res_pressures_pa, these_pressures_pa)) print('Number of levels in high-resolution grid = {0:d}'.format( len(high_res_pressures_pa))) if high_res_pressures_pa[0] < TOLERANCE: high_res_pressures_pa[0] = 0.5 * high_res_pressures_pa[1] high_res_pressures_pa = high_res_pressures_pa[::-1] high_res_heights_m_asl = standard_atmo.pressure_to_height( high_res_pressures_pa) file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = example_io.read_file(example_file_name) heating_rate_matrix_k_day01 = example_utils.get_field_from_dict( example_dict=example_dict, field_name=example_utils.SHORTWAVE_HEATING_RATE_NAME) if choose_max_heating_rate: hr_criterion_by_example = numpy.max(heating_rate_matrix_k_day01, axis=1) else: abs_diff_matrix = numpy.absolute( numpy.diff(heating_rate_matrix_k_day01[:, :-1], axis=1)) hr_criterion_by_example = numpy.max(abs_diff_matrix, axis=1) good_indices = numpy.argsort(-1 * hr_criterion_by_example) good_indices = good_indices[:num_examples] example_dict = example_utils.subset_by_index(example_dict=example_dict, desired_indices=good_indices) num_examples = len(good_indices) max_differences_k_day01 = numpy.full(num_examples, numpy.nan) for i in range(num_examples): max_differences_k_day01[i] = _run_experiment_one_example( example_dict=example_dict, example_index=i, max_noise_k_day01=max_noise_k_day01, high_res_pressures_pa=high_res_pressures_pa, high_res_heights_m_asl=high_res_heights_m_asl, first_interp_method_name=first_interp_method_name, second_interp_method_name=second_interp_method_name, interp_fluxes=interp_fluxes, output_dir_name=output_dir_name) print('Average max difference = {0:.4f} K day^-1'.format( numpy.mean(max_differences_k_day01))) print('Median max difference = {0:.4f} K day^-1'.format( numpy.median(max_differences_k_day01))) print('Max max difference = {0:.4f} K day^-1'.format( numpy.max(max_differences_k_day01)))
def test_assert_is_positive_numpy_array_true(self): """Checks assert_is_greater_numpy_array; base_value = 0, inputs > 0.""" error_checking.assert_is_greater_numpy_array(POSITIVE_NUMPY_ARRAY, 0)
def _run(saliency_file_names, monte_carlo_file_names, composite_names, colour_map_name, max_colour_values, half_num_contours, smoothing_radius_grid_cells, output_dir_name): """Makes figure with sanity checks for MYRORSS saliency maps. This is effectively the main method. :param saliency_file_names: See documentation at top of file. :param monte_carlo_file_names: Same. :param composite_names: Same. :param colour_map_name: Same. :param max_colour_values: Same. :param half_num_contours: Same. :param smoothing_radius_grid_cells: Same. :param output_dir_name: Same. """ # Process input args. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) if smoothing_radius_grid_cells <= 0: smoothing_radius_grid_cells = None colour_map_object = pyplot.cm.get_cmap(colour_map_name) error_checking.assert_is_geq(half_num_contours, 5) num_composites = len(saliency_file_names) expected_dim = numpy.array([num_composites], dtype=int) error_checking.assert_is_numpy_array(numpy.array(composite_names), exact_dimensions=expected_dim) error_checking.assert_is_numpy_array(numpy.array(monte_carlo_file_names), exact_dimensions=expected_dim) monte_carlo_file_names = [ None if f in NONE_STRINGS else f for f in monte_carlo_file_names ] error_checking.assert_is_greater_numpy_array(max_colour_values, 0.) error_checking.assert_is_numpy_array(max_colour_values, exact_dimensions=expected_dim) composite_names_abbrev = [ n.replace('_', '-').lower() for n in composite_names ] composite_names_verbose = [ '({0:s}) {1:s}'.format(chr(ord('a') + i), composite_names[i].replace('_', ' ')) for i in range(num_composites) ] panel_file_names = [None] * num_composites for i in range(num_composites): panel_file_names[i] = _plot_one_composite( saliency_file_name=saliency_file_names[i], monte_carlo_file_name=monte_carlo_file_names[i], composite_name_abbrev=composite_names_abbrev[i], composite_name_verbose=composite_names_verbose[i], colour_map_object=colour_map_object, max_colour_value=max_colour_values[i], half_num_contours=half_num_contours, smoothing_radius_grid_cells=smoothing_radius_grid_cells, output_dir_name=output_dir_name) _add_colour_bar(figure_file_name=panel_file_names[i], colour_map_object=colour_map_object, max_colour_value=max_colour_values[i], temporary_dir_name=output_dir_name) print('\n') figure_file_name = '{0:s}/saliency_concat.jpg'.format(output_dir_name) print('Concatenating panels to: "{0:s}"...'.format(figure_file_name)) num_panel_rows = int(numpy.floor(numpy.sqrt(num_composites))) num_panel_columns = int(numpy.ceil(float(num_composites) / num_panel_rows)) imagemagick_utils.concatenate_images(input_file_names=panel_file_names, output_file_name=figure_file_name, border_width_pixels=100, num_panel_rows=num_panel_rows, num_panel_columns=num_panel_columns) imagemagick_utils.trim_whitespace(input_file_name=figure_file_name, output_file_name=figure_file_name, border_width_pixels=10)
def test_assert_is_positive_numpy_array_non_positive(self): """Checks assert_is_greater_numpy_array; base_value = 0, inputs <= 0.""" with self.assertRaises(ValueError): error_checking.assert_is_greater_numpy_array( NON_POSITIVE_NUMPY_ARRAY, 0)
def write_standard_file(pickle_file_name, list_of_input_matrices, list_of_saliency_matrices, storm_ids, storm_times_unix_sec, model_file_name, saliency_metadata_dict, sounding_pressure_matrix_pascals=None): """Writes saliency maps (one per example) to Pickle file. T = number of input tensors to the model E = number of examples (storm objects) H = number of height levels per sounding :param pickle_file_name: Path to output file. :param list_of_input_matrices: length-T list of numpy arrays, containing predictors (inputs to the model). The first dimension of each array must have length E. :param list_of_saliency_matrices: length-T list of numpy arrays, containing saliency values. list_of_saliency_matrices[i] must have the same dimensions as list_of_input_matrices[i]. :param storm_ids: length-E list of storm IDs (strings). :param storm_times_unix_sec: length-E numpy array of storm times. :param model_file_name: Path to file with trained model (readable by `cnn.read_model`). :param saliency_metadata_dict: Dictionary created by `check_metadata`. :param sounding_pressure_matrix_pascals: E-by-H numpy array of pressure levels in soundings. Useful only when the model input contains soundings with no pressure, because it is needed to plot soundings. :raises: ValueError: if `list_of_input_matrices` and `list_of_saliency_matrices` have different lengths. """ error_checking.assert_is_string(model_file_name) error_checking.assert_is_string_list(storm_ids) error_checking.assert_is_numpy_array(numpy.array(storm_ids), num_dimensions=1) num_storm_objects = len(storm_ids) these_expected_dim = numpy.array([num_storm_objects], dtype=int) error_checking.assert_is_integer_numpy_array(storm_times_unix_sec) error_checking.assert_is_numpy_array(storm_times_unix_sec, exact_dimensions=these_expected_dim) error_checking.assert_is_list(list_of_input_matrices) error_checking.assert_is_list(list_of_saliency_matrices) num_input_matrices = len(list_of_input_matrices) num_saliency_matrices = len(list_of_saliency_matrices) if num_input_matrices != num_saliency_matrices: error_string = ( 'Number of input matrices ({0:d}) should equal number of saliency ' 'matrices ({1:d}).').format(num_input_matrices, num_saliency_matrices) raise ValueError(error_string) for i in range(num_input_matrices): error_checking.assert_is_numpy_array_without_nan( list_of_input_matrices[i]) error_checking.assert_is_numpy_array_without_nan( list_of_saliency_matrices[i]) these_expected_dim = numpy.array( (num_storm_objects, ) + list_of_input_matrices[i].shape[1:], dtype=int) error_checking.assert_is_numpy_array( list_of_input_matrices[i], exact_dimensions=these_expected_dim) these_expected_dim = numpy.array(list_of_input_matrices[i].shape, dtype=int) error_checking.assert_is_numpy_array( list_of_saliency_matrices[i], exact_dimensions=these_expected_dim) if sounding_pressure_matrix_pascals is not None: error_checking.assert_is_numpy_array_without_nan( sounding_pressure_matrix_pascals) error_checking.assert_is_greater_numpy_array( sounding_pressure_matrix_pascals, 0.) error_checking.assert_is_numpy_array(sounding_pressure_matrix_pascals, num_dimensions=2) these_expected_dim = numpy.array( (num_storm_objects, ) + sounding_pressure_matrix_pascals.shape[1:], dtype=int) error_checking.assert_is_numpy_array( sounding_pressure_matrix_pascals, exact_dimensions=these_expected_dim) saliency_dict = { INPUT_MATRICES_KEY: list_of_input_matrices, SALIENCY_MATRICES_KEY: list_of_saliency_matrices, STORM_IDS_KEY: storm_ids, STORM_TIMES_KEY: storm_times_unix_sec, MODEL_FILE_NAME_KEY: model_file_name, COMPONENT_TYPE_KEY: saliency_metadata_dict[COMPONENT_TYPE_KEY], TARGET_CLASS_KEY: saliency_metadata_dict[TARGET_CLASS_KEY], LAYER_NAME_KEY: saliency_metadata_dict[LAYER_NAME_KEY], IDEAL_ACTIVATION_KEY: saliency_metadata_dict[IDEAL_ACTIVATION_KEY], NEURON_INDICES_KEY: saliency_metadata_dict[NEURON_INDICES_KEY], CHANNEL_INDEX_KEY: saliency_metadata_dict[CHANNEL_INDEX_KEY], SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pascals } file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name) pickle_file_handle = open(pickle_file_name, 'wb') pickle.dump(saliency_dict, pickle_file_handle) pickle_file_handle.close()
def test_assert_is_positive_numpy_array_mixed_sign(self): """assert_is_greater_numpy_array; base_value = 0, inputs mixed sign.""" with self.assertRaises(ValueError): error_checking.assert_is_greater_numpy_array( MIXED_SIGN_NUMPY_ARRAY, 0)
def find_events_in_grid_cell(event_x_coords_metres, event_y_coords_metres, grid_edge_x_coords_metres, grid_edge_y_coords_metres, row_index, column_index, verbose): """Finds events in a certain grid cell. E = number of events M = number of rows in grid N = number of columns in grid :param event_x_coords_metres: length-E numpy array of x-coordinates. :param event_y_coords_metres: length-E numpy array of y-coordinates. :param grid_edge_x_coords_metres: length-(N + 1) numpy array with x-coordinates at edges of grid cells. :param grid_edge_y_coords_metres: length-(M + 1) numpy array with y-coordinates at edges of grid cells. :param row_index: Will find events in [i]th row of grid, where i = `row_index.` :param column_index: Will find events in [j]th column of grid, where j = `column_index.` :param verbose: Boolean flag. If True, messages will be printed to command window. :return: desired_indices: 1-D numpy array with indices of events in desired grid cell. """ error_checking.assert_is_numpy_array_without_nan(event_x_coords_metres) error_checking.assert_is_numpy_array(event_x_coords_metres, num_dimensions=1) num_events = len(event_x_coords_metres) these_expected_dim = numpy.array([num_events], dtype=int) error_checking.assert_is_numpy_array_without_nan(event_y_coords_metres) error_checking.assert_is_numpy_array(event_y_coords_metres, exact_dimensions=these_expected_dim) error_checking.assert_is_numpy_array(grid_edge_x_coords_metres, num_dimensions=1) error_checking.assert_is_greater_numpy_array( numpy.diff(grid_edge_x_coords_metres), 0) error_checking.assert_is_numpy_array(grid_edge_y_coords_metres, num_dimensions=1) error_checking.assert_is_greater_numpy_array( numpy.diff(grid_edge_y_coords_metres), 0) error_checking.assert_is_integer(row_index) error_checking.assert_is_geq(row_index, 0) error_checking.assert_is_integer(column_index) error_checking.assert_is_geq(column_index, 0) error_checking.assert_is_boolean(verbose) x_min_metres = grid_edge_x_coords_metres[column_index] x_max_metres = grid_edge_x_coords_metres[column_index + 1] y_min_metres = grid_edge_y_coords_metres[row_index] y_max_metres = grid_edge_y_coords_metres[row_index + 1] if row_index == len(grid_edge_y_coords_metres) - 2: y_max_metres += TOLERANCE if column_index == len(grid_edge_x_coords_metres) - 2: x_max_metres += TOLERANCE # TODO(thunderhoser): If need be, I could speed this up by computing # `row_flags` only once per row and `column_flags` only once per column. row_flags = numpy.logical_and(event_y_coords_metres >= y_min_metres, event_y_coords_metres < y_max_metres) if not numpy.any(row_flags): if verbose: print('0 of {0:d} events are in grid cell ({1:d}, {2:d})!'.format( num_events, row_index, column_index)) return numpy.array([], dtype=int) column_flags = numpy.logical_and(event_x_coords_metres >= x_min_metres, event_x_coords_metres < x_max_metres) if not numpy.any(column_flags): if verbose: print('0 of {0:d} events are in grid cell ({1:d}, {2:d})!'.format( num_events, row_index, column_index)) return numpy.array([], dtype=int) desired_indices = numpy.where(numpy.logical_and(row_flags, column_flags))[0] if verbose: print('{0:d} of {1:d} events are in grid cell ({2:d}, {3:d})!'.format( len(desired_indices), num_events, row_index, column_index)) return desired_indices
def create_3d_net( num_input_features, first_spatial_dimensions, rowcol_upsampling_factors, height_upsampling_factors, num_output_channels, l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT, use_transposed_conv=True, activation_function_name=None, alpha_for_elu=DEFAULT_ALPHA_FOR_ELU, alpha_for_relu=DEFAULT_ALPHA_FOR_RELU, use_activn_for_last_layer=False, use_batch_norm=True, use_batch_norm_for_last_layer=True): """Creates (but does not train) upconvnet with 3 spatial dimensions. L = number of main (transposed-conv or upsampling) layers :param num_input_features: Length of input feature vector. :param first_spatial_dimensions: length-3 numpy array of dimensions in first main layer. The order should be (num_rows, num_columns, num_heights). Before it is passed to the first main layer, the feature vector will be reshaped into a grid with these dimensions. :param rowcol_upsampling_factors: length-L numpy array of upsampling factors for horizontal dimensions. :param height_upsampling_factors: length-L numpy array of upsampling factors for vertical dimension. :param num_output_channels: Number of channels in output image. :param l1_weight: Weight of L1 regularization for conv and transposed-conv layers. :param l2_weight: Same but for L2 regularization. :param use_transposed_conv: Boolean flag. If True, each upsampling will be done with a transposed-conv layer. If False, each upsampling will be done with an upsampling layer followed by a normal conv layer. :param activation_function_name: Activation function. If you do not want activation, make this None. Otherwise, must be accepted by `architecture_utils.check_activation_function`. :param alpha_for_elu: See doc for `architecture_utils.check_activation_function`. :param alpha_for_relu: Same. :param use_activn_for_last_layer: Boolean flag. If True, will apply activation function to output image. :param use_batch_norm: Boolean flag. If True, will apply batch normalization to conv and transposed-conv layers. :param use_batch_norm_for_last_layer: Boolean flag. If True, will apply batch normalization to output image. :return: model_object: Untrained model (instance of `keras.models.Model`). """ # TODO(thunderhoser): This method assumes that the original CNN does # edge-padding. # Check input args. error_checking.assert_is_integer(num_input_features) error_checking.assert_is_greater(num_input_features, 0) error_checking.assert_is_integer(num_output_channels) error_checking.assert_is_greater(num_output_channels, 0) error_checking.assert_is_geq(l1_weight, 0.) error_checking.assert_is_geq(l2_weight, 0.) error_checking.assert_is_boolean(use_transposed_conv) error_checking.assert_is_boolean(use_activn_for_last_layer) error_checking.assert_is_boolean(use_batch_norm) error_checking.assert_is_boolean(use_batch_norm_for_last_layer) error_checking.assert_is_numpy_array( first_spatial_dimensions, exact_dimensions=numpy.array([3], dtype=int) ) error_checking.assert_is_integer_numpy_array(first_spatial_dimensions) error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0) error_checking.assert_is_numpy_array( rowcol_upsampling_factors, num_dimensions=1 ) error_checking.assert_is_integer_numpy_array(rowcol_upsampling_factors) error_checking.assert_is_geq_numpy_array(rowcol_upsampling_factors, 1) num_main_layers = len(rowcol_upsampling_factors) these_expected_dim = numpy.array([num_main_layers], dtype=int) error_checking.assert_is_numpy_array( height_upsampling_factors, exact_dimensions=these_expected_dim ) error_checking.assert_is_integer_numpy_array(height_upsampling_factors) error_checking.assert_is_geq_numpy_array(height_upsampling_factors, 1) # Set up CNN architecture. regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight) input_layer_object = keras.layers.Input(shape=(num_input_features,)) current_num_filters = int(numpy.round( num_input_features / numpy.prod(first_spatial_dimensions) )) first_dimensions = numpy.concatenate(( first_spatial_dimensions, numpy.array([current_num_filters], dtype=int) )) layer_object = keras.layers.Reshape( target_shape=first_dimensions )(input_layer_object) kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE, CONV_FILTER_SIZE) for i in range(num_main_layers): if i == num_main_layers - 1: current_num_filters = num_output_channels + 0 elif rowcol_upsampling_factors[i] == 1: current_num_filters = int(numpy.round(current_num_filters / 2)) this_stride_tuple = ( rowcol_upsampling_factors[i], rowcol_upsampling_factors[i], height_upsampling_factors[i] ) if use_transposed_conv: layer_object = keras.layers.Conv3DTranspose( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=this_stride_tuple, padding='same', data_format='channels_last', dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) else: if rowcol_upsampling_factors[i] > 1: try: layer_object = keras.layers.UpSampling3D( size=this_stride_tuple, data_format='channels_last', interpolation='bilinear' )(layer_object) except: layer_object = keras.layers.UpSampling3D( size=this_stride_tuple, data_format='channels_last' )(layer_object) layer_object = keras.layers.Conv3D( filters=current_num_filters, kernel_size=kernel_size_tuple, strides=(1, 1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=regularizer_object )(layer_object) use_activation_here = ( activation_function_name is not None and (i < num_main_layers - 1 or use_activn_for_last_layer) ) if use_activation_here: layer_object = architecture_utils.get_activation_layer( activation_function_string=activation_function_name, alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu )(layer_object) use_batch_norm_here = ( use_batch_norm and (i < num_main_layers - 1 or use_batch_norm_for_last_layer) ) if use_batch_norm_here: layer_object = ( architecture_utils.get_batch_norm_layer()(layer_object) ) # Compile CNN. model_object = keras.models.Model( inputs=input_layer_object, outputs=layer_object) model_object.compile( loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam() ) model_object.summary() return model_object