def test_get_translations(self):
        """Ensures correct output from get_translations."""

        (these_x_offsets_pixels, these_y_offsets_pixels
        ) = data_augmentation.get_translations(
            num_translations=NUM_TRANSLATIONS,
            max_translation_pixels=MAX_TRANSLATION_PIXELS,
            num_grid_rows=2 * MAX_TRANSLATION_PIXELS,
            num_grid_columns=2 * MAX_TRANSLATION_PIXELS)

        self.assertTrue(len(these_x_offsets_pixels) == NUM_TRANSLATIONS)
        error_checking.assert_is_geq_numpy_array(
            these_x_offsets_pixels, -MAX_TRANSLATION_PIXELS)
        error_checking.assert_is_leq_numpy_array(
            these_x_offsets_pixels, MAX_TRANSLATION_PIXELS)

        self.assertTrue(len(these_y_offsets_pixels) == NUM_TRANSLATIONS)
        error_checking.assert_is_geq_numpy_array(
            these_y_offsets_pixels, -MAX_TRANSLATION_PIXELS)
        error_checking.assert_is_leq_numpy_array(
            these_y_offsets_pixels, MAX_TRANSLATION_PIXELS)

        error_checking.assert_is_greater_numpy_array(
            numpy.absolute(these_x_offsets_pixels) +
            numpy.absolute(these_y_offsets_pixels), 0)
示例#2
0
def dimensions_to_grid_id(grid_dimensions):
    """Determines grid from dimensions.

    :param grid_dimensions: 1-D numpy array with [num_rows, num_columns].
    :return: grid_id: String ID for grid.
    :raises: ValueError: if dimensions do not match a known grid.
    """

    error_checking.assert_is_numpy_array(grid_dimensions,
                                         exact_dimensions=numpy.array([2]))
    error_checking.assert_is_integer_numpy_array(grid_dimensions)
    error_checking.assert_is_greater_numpy_array(grid_dimensions, 1)

    these_dimensions = get_grid_dimensions(NARR_MODEL_NAME)
    if numpy.array_equal(these_dimensions, grid_dimensions):
        return ID_FOR_221GRID

    for this_grid_id in RUC_GRID_IDS:
        these_dimensions = get_grid_dimensions(RUC_MODEL_NAME, this_grid_id)
        if numpy.array_equal(these_dimensions, grid_dimensions):
            return this_grid_id

    raise ValueError('Dimensions (' + str(grid_dimensions[0]) + ' rows x ' +
                     str(grid_dimensions[1]) +
                     ' columns) do not match a known grid.')
示例#3
0
def project_wind_to_thermal_gradient(u_matrix_grid_relative_m_s01,
                                     v_matrix_grid_relative_m_s01,
                                     thermal_field_matrix_kelvins,
                                     x_spacing_metres, y_spacing_metres):
    """At each grid point, projects wind to direction of thermal gradient.

    M = number of rows in grid
    N = number of columns in grid

    :param u_matrix_grid_relative_m_s01: M-by-N numpy array of grid-relative
        u-wind (in the direction of increasing column number, or towards the
        right).  Units are metres per second.
    :param v_matrix_grid_relative_m_s01: M-by-N numpy array of grid-relative
        v-wind (in the direction of increasing row number, or towards the
        bottom).
    :param thermal_field_matrix_kelvins: See doc for `get_thermal_front_param`.
    :param x_spacing_metres: Same.
    :param y_spacing_metres: Same.
    :return: projected_velocity_matrix_m_s01: M-by-N numpy array with wind
        velocity in direction of thermal gradient.  Positive (negative) values
        mean that the wind is blowing towards warmer (cooler) air.
    """

    error_checking.assert_is_numpy_array_without_nan(
        u_matrix_grid_relative_m_s01)
    error_checking.assert_is_numpy_array(u_matrix_grid_relative_m_s01,
                                         num_dimensions=2)

    error_checking.assert_is_numpy_array_without_nan(
        v_matrix_grid_relative_m_s01)
    error_checking.assert_is_numpy_array(
        v_matrix_grid_relative_m_s01,
        exact_dimensions=numpy.array(u_matrix_grid_relative_m_s01.shape))

    error_checking.assert_is_numpy_array_without_nan(
        thermal_field_matrix_kelvins)
    error_checking.assert_is_greater_numpy_array(thermal_field_matrix_kelvins,
                                                 0.)
    error_checking.assert_is_numpy_array(
        thermal_field_matrix_kelvins,
        exact_dimensions=numpy.array(u_matrix_grid_relative_m_s01.shape))

    x_grad_matrix_kelvins_m01, y_grad_matrix_kelvins_m01 = _get_2d_gradient(
        field_matrix=thermal_field_matrix_kelvins,
        x_spacing_metres=x_spacing_metres,
        y_spacing_metres=y_spacing_metres)
    y_grad_matrix_kelvins_m01 = y_grad_matrix_kelvins_m01
    grad_magnitude_matrix_kelvins_m01 = numpy.sqrt(
        x_grad_matrix_kelvins_m01**2 + y_grad_matrix_kelvins_m01**2)

    first_matrix = (u_matrix_grid_relative_m_s01 * x_grad_matrix_kelvins_m01 /
                    grad_magnitude_matrix_kelvins_m01)
    first_matrix[numpy.isnan(first_matrix)] = 0.

    second_matrix = (v_matrix_grid_relative_m_s01 * y_grad_matrix_kelvins_m01 /
                     grad_magnitude_matrix_kelvins_m01)
    second_matrix[numpy.isnan(second_matrix)] = 0.

    return first_matrix + second_matrix
示例#4
0
    def test_assert_is_positive_numpy_array_true_with_nan_allowed(self):
        """Checks assert_is_greater_numpy_array; base_value = 0, inputs > 0.

        In this case, input array contains NaN's and allow_nan = True.
        """

        error_checking.assert_is_greater_numpy_array(
            POSITIVE_NUMPY_ARRAY_WITH_NANS, 0, allow_nan=True)
def pressure_to_height(pressures_pascals):
    """Converts pressures to heights.

    :param pressures_pascals: numpy array of pressures.
    :return: heights_m_asl: equivalent-size numpy array of heights (metres above
        sea level).
    """

    error_checking.assert_is_greater_numpy_array(pressures_pascals, 0.)

    original_shape = pressures_pascals.shape
    pressures_pascals = numpy.ravel(pressures_pascals)

    num_points = len(pressures_pascals)
    heights_m_asl = numpy.full(num_points, numpy.nan)

    for i in range(len(STANDARD_PRESSURES_PASCALS) + 1):
        if i == 0:
            this_bottom_index = 0
            this_top_index = 1
            this_min_pressure_pascals = STANDARD_PRESSURES_PASCALS[0]
            this_max_pressure_pascals = numpy.inf
        elif i == len(STANDARD_PRESSURES_PASCALS):
            this_bottom_index = -2
            this_top_index = -1
            this_min_pressure_pascals = 0.
            this_max_pressure_pascals = STANDARD_PRESSURES_PASCALS[-1]
        else:
            this_bottom_index = i - 1
            this_top_index = i
            this_min_pressure_pascals = STANDARD_PRESSURES_PASCALS[i]
            this_max_pressure_pascals = STANDARD_PRESSURES_PASCALS[i - 1]

        these_indices = numpy.where(
            numpy.logical_and(
                pressures_pascals >= this_min_pressure_pascals,
                pressures_pascals < this_max_pressure_pascals))[0]

        if len(these_indices) == 0:
            continue

        this_numerator = (STANDARD_HEIGHTS_M_ASL[this_bottom_index] -
                          STANDARD_HEIGHTS_M_ASL[this_top_index])
        this_denominator = (numpy.log(
            STANDARD_PRESSURES_PASCALS[this_top_index] /
            STANDARD_PRESSURES_PASCALS[this_bottom_index]))
        this_e_folding_height_metres = this_numerator / this_denominator

        these_logs = numpy.log(pressures_pascals[these_indices] /
                               STANDARD_PRESSURES_PASCALS[this_bottom_index])
        heights_m_asl[these_indices] = (
            STANDARD_HEIGHTS_M_ASL[this_bottom_index] -
            this_e_folding_height_metres * these_logs)

    return numpy.reshape(heights_m_asl, original_shape)
示例#6
0
def get_thermal_front_param(thermal_field_matrix_kelvins, x_spacing_metres,
                            y_spacing_metres):
    """Computes thermal front parameter (TFP) at each grid point.

    TFP is defined in Renard and Clarke (1965).

    M = number of rows in grid
    N = number of columns in grid

    :param thermal_field_matrix_kelvins: M-by-N numpy array with values of
        thermal variable.  This can be any thermal variable ([potential]
        temperature, wet-bulb [potential] temperature, equivalent [potential]
        temperature, etc.).
    :param x_spacing_metres: Spacing between grid points in adjacent columns.
    :param y_spacing_metres: Spacing between grid points in adjacent rows.
    :return: tfp_matrix_kelvins_m02: M-by-N numpy array with TFP at each grid
        point. Units are Kelvins per m^2.
    """

    error_checking.assert_is_numpy_array_without_nan(
        thermal_field_matrix_kelvins)
    error_checking.assert_is_greater_numpy_array(thermal_field_matrix_kelvins,
                                                 0.)
    error_checking.assert_is_numpy_array(thermal_field_matrix_kelvins,
                                         num_dimensions=2)

    error_checking.assert_is_greater(x_spacing_metres, 0.)
    error_checking.assert_is_greater(y_spacing_metres, 0.)

    x_grad_matrix_kelvins_m01, y_grad_matrix_kelvins_m01 = _get_2d_gradient(
        field_matrix=thermal_field_matrix_kelvins,
        x_spacing_metres=x_spacing_metres,
        y_spacing_metres=y_spacing_metres)

    grad_magnitude_matrix_kelvins_m01 = numpy.sqrt(
        x_grad_matrix_kelvins_m01**2 + y_grad_matrix_kelvins_m01**2)
    (x_grad_grad_matrix_kelvins_m02,
     y_grad_grad_matrix_kelvins_m02) = _get_2d_gradient(
         field_matrix=grad_magnitude_matrix_kelvins_m01,
         x_spacing_metres=x_spacing_metres,
         y_spacing_metres=y_spacing_metres)

    first_matrix = (-x_grad_grad_matrix_kelvins_m02 *
                    x_grad_matrix_kelvins_m01 /
                    grad_magnitude_matrix_kelvins_m01)
    first_matrix[numpy.isnan(first_matrix)] = 0.

    second_matrix = (-y_grad_grad_matrix_kelvins_m02 *
                     y_grad_matrix_kelvins_m01 /
                     grad_magnitude_matrix_kelvins_m01)
    second_matrix[numpy.isnan(second_matrix)] = 0.

    return first_matrix + second_matrix
示例#7
0
def check_time_separation(unix_times_sec,
                          early_indices=None,
                          late_indices=None,
                          time_separation_sec=DEFAULT_TIME_SEPARATION_SEC):
    """Ensures that there is a separation (buffer) between two sets of times.

    :param unix_times_sec: See documentation for _apply_time_separation.
    :param early_indices: See documentation for _apply_time_separation.
    :param late_indices: See documentation for _apply_time_separation.
    :param time_separation_sec: See documentation for _apply_time_separation.
    :raises: ValueError: if separation between sets is < `time_separation_sec`.
    """

    error_checking.assert_is_integer_numpy_array(unix_times_sec)
    error_checking.assert_is_numpy_array_without_nan(unix_times_sec)
    error_checking.assert_is_numpy_array(unix_times_sec, num_dimensions=1)

    num_times = len(unix_times_sec)

    error_checking.assert_is_integer_numpy_array(early_indices)
    error_checking.assert_is_numpy_array(early_indices, num_dimensions=1)
    error_checking.assert_is_geq_numpy_array(early_indices, 0)
    error_checking.assert_is_leq_numpy_array(early_indices, num_times - 1)

    error_checking.assert_is_integer_numpy_array(late_indices)
    error_checking.assert_is_numpy_array(late_indices, num_dimensions=1)
    error_checking.assert_is_geq_numpy_array(late_indices, 0)
    error_checking.assert_is_leq_numpy_array(late_indices, num_times - 1)
    error_checking.assert_is_greater_numpy_array(
        unix_times_sec[late_indices], numpy.max(unix_times_sec[early_indices]))

    error_checking.assert_is_integer(time_separation_sec)
    error_checking.assert_is_greater(time_separation_sec, 0)

    last_early_time_unix_sec = numpy.max(unix_times_sec[early_indices])
    first_late_time_unix_sec = numpy.min(unix_times_sec[late_indices])
    min_diff_between_sets_sec = (first_late_time_unix_sec -
                                 last_early_time_unix_sec)
    if min_diff_between_sets_sec < time_separation_sec:
        last_early_time_string = time_conversion.unix_sec_to_string(
            last_early_time_unix_sec, TIME_STRING_FORMAT)
        first_late_time_string = time_conversion.unix_sec_to_string(
            first_late_time_unix_sec, TIME_STRING_FORMAT)

        error_string = ('Last time in early set is ' + last_early_time_string +
                        '.  First time in late set is ' +
                        first_late_time_string +
                        '.  This is a time separation of ' +
                        str(min_diff_between_sets_sec) +
                        ' seconds between sets.  Required separation is >= ' +
                        str(time_separation_sec) + ' s.')
        raise ValueError(error_string)
示例#8
0
def unzip_1day_tar_file(tar_file_name, spc_date_string, top_target_dir_name,
                        scales_to_extract_metres2):
    """Unzips tar file with segmotion output for one SPC date.

    :param tar_file_name: Path to input file.
    :param spc_date_string: SPC date (format "yyyymmdd").
    :param top_target_dir_name: Name of top-level output directory.
    :param scales_to_extract_metres2: 1-D numpy array of tracking scales to
        extract.
    :return: target_directory_name: Path to output directory.  This will be
        "<top_target_directory_name>/<yyyymmdd>", where <yyyymmdd> is the SPC
        date.
    """

    # Verification.
    _ = time_conversion.spc_date_string_to_unix_sec(spc_date_string)
    error_checking.assert_file_exists(tar_file_name)
    error_checking.assert_is_greater_numpy_array(scales_to_extract_metres2, 0)
    error_checking.assert_is_numpy_array(scales_to_extract_metres2,
                                         num_dimensions=1)

    scales_to_extract_metres2 = numpy.round(scales_to_extract_metres2).astype(
        int)

    num_scales_to_extract = len(scales_to_extract_metres2)
    directory_names_to_unzip = []

    for j in range(num_scales_to_extract):
        this_relative_stats_dir_name = '{0:s}/{1:s}'.format(
            spc_date_string,
            _get_relative_stats_dir_physical_scale(
                scales_to_extract_metres2[j]))

        this_relative_polygon_dir_name = '{0:s}/{1:s}'.format(
            spc_date_string,
            _get_relative_polygon_dir_physical_scale(
                scales_to_extract_metres2[j]))

        directory_names_to_unzip.append(
            this_relative_stats_dir_name.replace(spc_date_string + '/', ''))
        directory_names_to_unzip.append(
            this_relative_polygon_dir_name.replace(spc_date_string + '/', ''))

    target_directory_name = '{0:s}/{1:s}/{2:s}'.format(top_target_dir_name,
                                                       spc_date_string[:4],
                                                       spc_date_string)

    unzipping.unzip_tar(tar_file_name,
                        target_directory_name=target_directory_name,
                        file_and_dir_names_to_unzip=directory_names_to_unzip)

    return target_directory_name
def classification_cutoffs_to_ranges(class_cutoffs, non_negative_only=True):
    """Converts classification cutoffs to min/max for each class.

    C = number of classes
    c = C - 1 = number of cutoffs

    :param class_cutoffs: length-c numpy array of class cutoffs.
    :param non_negative_only: Boolean flag.  If True, class cutoffs/minima/
        maxima must be non-negative.
    :return: class_cutoffs: Same as input, but containing only unique values and
        sorted in ascending order.
    :return: class_minima: length-C numpy array of class minima, sorted in
        ascending order.
    :return: class_maxima: length-C numpy array of class maxima, sorted in
        ascending order.
    """

    error_checking.assert_is_boolean(non_negative_only)
    error_checking.assert_is_numpy_array(class_cutoffs, num_dimensions=1)
    if non_negative_only:
        error_checking.assert_is_greater_numpy_array(class_cutoffs, 0.)
    else:
        error_checking.assert_is_numpy_array_without_nan(class_cutoffs)

    class_cutoffs = numpy.sort(numpy.unique(class_cutoffs))
    num_classes = len(class_cutoffs) + 1
    class_minima = numpy.full(num_classes, numpy.nan)
    class_maxima = numpy.full(num_classes, numpy.nan)

    for k in range(num_classes):
        if k == 0:
            class_maxima[k] = class_cutoffs[k]
            if non_negative_only:
                class_minima[k] = 0.
            else:
                class_minima[k] = -numpy.inf

        elif k == num_classes - 1:
            class_minima[k] = class_cutoffs[k - 1]
            class_maxima[k] = numpy.inf
        else:
            class_minima[k] = class_cutoffs[k - 1]
            class_maxima[k] = class_cutoffs[k]

    return class_cutoffs, class_minima, class_maxima
示例#10
0
    def loss(target_tensor, forecast_probability_tensor):
        """Computes weighted cross-entropy.

        :param target_tensor: See docstring for the 3 possible formats.
        :param forecast_probability_tensor: Same.
        :return: loss: Weighted cross-entropy.
        """

        error_checking.assert_is_greater_numpy_array(class_weights, 0.)

        num_dimensions = _get_num_tensor_dimensions(target_tensor)
        if num_dimensions == 1:
            error_checking.assert_is_numpy_array(class_weights,
                                                 exact_dimensions=numpy.array(
                                                     [2]))
        else:
            error_checking.assert_is_numpy_array(class_weights,
                                                 num_dimensions=1)

        num_classes = len(class_weights)
        class_weight_tensor = tensorflow.convert_to_tensor(class_weights,
                                                           dtype='float32')
        class_weight_tensor = K.reshape(class_weight_tensor, (num_classes, 1))

        if num_dimensions == 1:
            example_weight_tensor = K.dot(
                keras.utils.to_categorical(target_tensor, num_classes),
                class_weight_tensor)
        else:
            example_weight_tensor = K.dot(target_tensor, class_weight_tensor)

        example_weight_tensor = K.reshape(example_weight_tensor,
                                          K.shape(example_weight_tensor)[:-1])

        return K.mean(example_weight_tensor * K.categorical_crossentropy(
            target_tensor, forecast_probability_tensor))
示例#11
0
def write_standard_file(pickle_file_name,
                        denorm_predictor_matrices,
                        cam_matrices,
                        guided_cam_matrices,
                        full_storm_id_strings,
                        storm_times_unix_sec,
                        model_file_name,
                        target_class,
                        target_layer_name,
                        sounding_pressure_matrix_pa=None):
    """Writes class-activation maps (one per storm object) to Pickle file.

    E = number of examples (storm objects)
    H = number of sounding heights

    :param pickle_file_name: Path to output file.
    :param denorm_predictor_matrices: See doc for `_check_in_and_out_matrices`.
    :param cam_matrices: Same.
    :param guided_cam_matrices: Same.
    :param full_storm_id_strings: length-E list of storm IDs.
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param model_file_name: Path to model that created saliency maps (readable
        by `cnn.read_model`).
    :param target_class: Target class.  `cam_matrices` and `guided_cam_matrices`
        contain activations for the [k + 1]th class, where k = `target_class`.
    :param target_layer_name: Name of target layer.
    :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure
        levels.  Needed only if the model is trained with soundings but without
        pressure as a predictor.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_integer(target_class)
    error_checking.assert_is_geq(target_class, 0)
    error_checking.assert_is_string(target_layer_name)

    error_checking.assert_is_string_list(full_storm_id_strings)
    error_checking.assert_is_numpy_array(numpy.array(full_storm_id_strings),
                                         num_dimensions=1)

    num_examples = len(full_storm_id_strings)
    these_expected_dim = numpy.array([num_examples], dtype=int)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(storm_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    _check_in_and_out_matrices(predictor_matrices=denorm_predictor_matrices,
                               num_examples=num_examples,
                               cam_matrices=cam_matrices,
                               guided_cam_matrices=guided_cam_matrices)

    if sounding_pressure_matrix_pa is not None:
        error_checking.assert_is_numpy_array_without_nan(
            sounding_pressure_matrix_pa)
        error_checking.assert_is_greater_numpy_array(
            sounding_pressure_matrix_pa, 0.)
        error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa,
                                             num_dimensions=2)

        these_expected_dim = numpy.array(
            (num_examples, ) + sounding_pressure_matrix_pa.shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim)

    gradcam_dict = {
        PREDICTOR_MATRICES_KEY: denorm_predictor_matrices,
        CAM_MATRICES_KEY: cam_matrices,
        GUIDED_CAM_MATRICES_KEY: guided_cam_matrices,
        MODEL_FILE_KEY: model_file_name,
        FULL_STORM_IDS_KEY: full_storm_id_strings,
        STORM_TIMES_KEY: storm_times_unix_sec,
        TARGET_CLASS_KEY: target_class,
        TARGET_LAYER_KEY: target_layer_name,
        SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(gradcam_dict, pickle_file_handle)
    pickle_file_handle.close()
示例#12
0
def create_2d_net(
        num_input_features, first_spatial_dimensions, upsampling_factors,
        num_output_channels,
        l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT,
        use_transposed_conv=True, activation_function_name=None,
        alpha_for_elu=DEFAULT_ALPHA_FOR_ELU,
        alpha_for_relu=DEFAULT_ALPHA_FOR_RELU,
        use_activn_for_last_layer=False,
        use_batch_norm=True, use_batch_norm_for_last_layer=True):
    """Creates (but does not train) upconvnet with 2 spatial dimensions.

    L = number of main (transposed-conv or upsampling) layers

    :param num_input_features: Length of input feature vector.
    :param first_spatial_dimensions: length-2 numpy array of dimensions in first
        main layer.  The order should be (num_rows, num_columns).  Before it is
        passed to the first main layer, the feature vector will be reshaped into
        a grid with these dimensions.
    :param upsampling_factors: length-L numpy array of upsampling factors.
    :param num_output_channels: See doc for `create_3d_net`.
    :param l1_weight: Same.
    :param l2_weight: Same.
    :param use_transposed_conv: Same.
    :param activation_function_name: Same.
    :param alpha_for_elu: Same.
    :param alpha_for_relu: Same.
    :param use_activn_for_last_layer: Same.
    :param use_batch_norm: Same.
    :param use_batch_norm_for_last_layer: Same.
    :return: model_object: Same.
    """

    # TODO(thunderhoser): This method assumes that the original CNN does
    # edge-padding.

    # Check input args.
    error_checking.assert_is_integer(num_input_features)
    error_checking.assert_is_greater(num_input_features, 0)
    error_checking.assert_is_integer(num_output_channels)
    error_checking.assert_is_greater(num_output_channels, 0)
    error_checking.assert_is_geq(l1_weight, 0.)
    error_checking.assert_is_geq(l2_weight, 0.)

    error_checking.assert_is_boolean(use_transposed_conv)
    error_checking.assert_is_boolean(use_activn_for_last_layer)
    error_checking.assert_is_boolean(use_batch_norm)
    error_checking.assert_is_boolean(use_batch_norm_for_last_layer)

    error_checking.assert_is_numpy_array(
        first_spatial_dimensions, exact_dimensions=numpy.array([2], dtype=int)
    )
    error_checking.assert_is_integer_numpy_array(first_spatial_dimensions)
    error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0)

    error_checking.assert_is_numpy_array(upsampling_factors, num_dimensions=1)
    error_checking.assert_is_integer_numpy_array(upsampling_factors)
    error_checking.assert_is_geq_numpy_array(upsampling_factors, 1)

    # Set up CNN architecture.
    regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight)
    input_layer_object = keras.layers.Input(shape=(num_input_features,))

    current_num_filters = int(numpy.round(
        num_input_features / numpy.prod(first_spatial_dimensions)
    ))
    first_dimensions = numpy.concatenate((
        first_spatial_dimensions, numpy.array([current_num_filters], dtype=int)
    ))
    layer_object = keras.layers.Reshape(
        target_shape=first_dimensions
    )(input_layer_object)

    num_main_layers = len(upsampling_factors)
    kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE)

    for i in range(num_main_layers):
        if i == num_main_layers - 1:
            current_num_filters = num_output_channels + 0

            # layer_object = keras.layers.ZeroPadding2D(
            #     padding=((1, 0), (1, 0)), data_format='channels_last'
            # )(layer_object)

        elif upsampling_factors[i] == 1:
            current_num_filters = int(numpy.round(current_num_filters / 2))

        this_stride_tuple = (upsampling_factors[i], upsampling_factors[i])

        if use_transposed_conv:
            layer_object = keras.layers.Conv2DTranspose(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=this_stride_tuple, padding='same',
                data_format='channels_last', dilation_rate=(1, 1),
                activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)
        else:
            if upsampling_factors[i] > 1:
                try:
                    layer_object = keras.layers.UpSampling2D(
                        size=this_stride_tuple, data_format='channels_last',
                        interpolation='bilinear'
                    )(layer_object)
                except:
                    layer_object = keras.layers.UpSampling2D(
                        size=this_stride_tuple, data_format='channels_last'
                    )(layer_object)

            layer_object = keras.layers.Conv2D(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=(1, 1), padding='same', data_format='channels_last',
                dilation_rate=(1, 1), activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)

        use_activation_here = (
            activation_function_name is not None and
            (i < num_main_layers - 1 or use_activn_for_last_layer)
        )

        if use_activation_here:
            layer_object = architecture_utils.get_activation_layer(
                activation_function_string=activation_function_name,
                alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu
            )(layer_object)

        use_batch_norm_here = (
            use_batch_norm and
            (i < num_main_layers - 1 or use_batch_norm_for_last_layer)
        )

        if use_batch_norm_here:
            layer_object = (
                architecture_utils.get_batch_norm_layer()(layer_object)
            )

    # Compile CNN.
    model_object = keras.models.Model(
        inputs=input_layer_object, outputs=layer_object)
    model_object.compile(
        loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam()
    )

    model_object.summary()
    return model_object
示例#13
0
def plot_saliency_for_sounding(saliency_matrix,
                               sounding_field_names,
                               pressure_levels_mb,
                               colour_map_object,
                               max_absolute_colour_value,
                               min_font_size=DEFAULT_MIN_SOUNDING_FONT_SIZE,
                               max_font_size=DEFAULT_MAX_SOUNDING_FONT_SIZE):
    """Plots saliency for one sounding.

    P = number of pressure levels
    F = number of fields

    :param saliency_matrix: P-by-F numpy array of saliency values.
    :param sounding_field_names: length-F list of field names.
    :param pressure_levels_mb: length-P list of pressure levels (millibars).
    :param colour_map_object: See doc for `plot_2d_grid`.
    :param max_absolute_colour_value: Same.
    :param min_font_size: Same.
    :param max_font_size: Same.
    """

    error_checking.assert_is_geq(max_absolute_colour_value, 0.)
    max_absolute_colour_value = max([max_absolute_colour_value, 0.001])

    error_checking.assert_is_greater_numpy_array(pressure_levels_mb, 0.)
    error_checking.assert_is_numpy_array(pressure_levels_mb, num_dimensions=1)

    error_checking.assert_is_list(sounding_field_names)
    error_checking.assert_is_numpy_array(numpy.array(sounding_field_names),
                                         num_dimensions=1)

    num_pressure_levels = len(pressure_levels_mb)
    num_sounding_fields = len(sounding_field_names)

    error_checking.assert_is_numpy_array_without_nan(saliency_matrix)
    error_checking.assert_is_numpy_array(saliency_matrix,
                                         exact_dimensions=numpy.array([
                                             num_pressure_levels,
                                             num_sounding_fields
                                         ]))

    try:
        u_wind_index = sounding_field_names.index(soundings.U_WIND_NAME)
        v_wind_index = sounding_field_names.index(soundings.V_WIND_NAME)
        plot_wind_barbs = True
    except ValueError:
        plot_wind_barbs = False

    if plot_wind_barbs:
        u_wind_saliency_values = saliency_matrix[:, u_wind_index]
        v_wind_saliency_values = saliency_matrix[:, v_wind_index]
        wind_saliency_magnitudes = numpy.sqrt(u_wind_saliency_values**2 +
                                              v_wind_saliency_values**2)

        colour_norm_object = pyplot.Normalize(vmin=0.,
                                              vmax=max_absolute_colour_value)

        rgb_matrix_for_wind = colour_map_object(
            colour_norm_object(wind_saliency_magnitudes))[..., :-1]

        non_wind_flags = numpy.array(
            [f not in WIND_COMPONENT_NAMES for f in sounding_field_names],
            dtype=bool)

        non_wind_indices = numpy.where(non_wind_flags)[0]
        saliency_matrix = saliency_matrix[:, non_wind_indices]
        sounding_field_names = [
            sounding_field_names[k] for k in non_wind_indices
        ]

        sounding_field_names.append(WIND_NAME)
        num_sounding_fields = len(sounding_field_names)

    rgb_matrix, font_size_matrix = _saliency_to_colour_and_size(
        saliency_matrix=saliency_matrix,
        colour_map_object=colour_map_object,
        max_absolute_colour_value=max_absolute_colour_value,
        min_font_size=min_font_size,
        max_font_size=max_font_size)

    _, axes_object = pyplot.subplots(1,
                                     1,
                                     figsize=(FIGURE_WIDTH_INCHES,
                                              FIGURE_HEIGHT_INCHES))

    axes_object.set_facecolor(
        plotting_utils.colour_from_numpy_to_tuple(
            SOUNDING_SALIENCY_BACKGROUND_COLOUR))

    for k in range(num_sounding_fields):
        if sounding_field_names[k] == WIND_NAME:
            for j in range(num_pressure_levels):
                this_vector = numpy.array(
                    [u_wind_saliency_values[j], v_wind_saliency_values[j]])

                this_vector = (WIND_SALIENCY_MULTIPLIER * this_vector /
                               numpy.linalg.norm(this_vector, ord=2))

                this_colour_tuple = plotting_utils.colour_from_numpy_to_tuple(
                    rgb_matrix_for_wind[j, ...])

                axes_object.barbs(k,
                                  pressure_levels_mb[j],
                                  this_vector[0],
                                  this_vector[1],
                                  length=WIND_BARB_LENGTH,
                                  fill_empty=True,
                                  rounding=False,
                                  sizes={'emptybarb': EMPTY_WIND_BARB_RADIUS},
                                  color=this_colour_tuple)

            continue

        for j in range(num_pressure_levels):
            this_colour_tuple = plotting_utils.colour_from_numpy_to_tuple(
                rgb_matrix[j, k, ...])

            if saliency_matrix[j, k] >= 0:
                axes_object.text(k,
                                 pressure_levels_mb[j],
                                 '+',
                                 fontsize=font_size_matrix[j, k],
                                 color=this_colour_tuple,
                                 horizontalalignment='center',
                                 verticalalignment='center')
            else:
                axes_object.text(k,
                                 pressure_levels_mb[j],
                                 '_',
                                 fontsize=font_size_matrix[j, k],
                                 color=this_colour_tuple,
                                 horizontalalignment='center',
                                 verticalalignment='bottom')

    axes_object.set_xlim(-0.5, num_sounding_fields - 0.5)
    axes_object.set_ylim(100, 1000)
    axes_object.invert_yaxis()
    pyplot.yscale('log')
    pyplot.minorticks_off()

    y_tick_locations = numpy.linspace(100, 1000, num=10, dtype=int)
    y_tick_labels = ['{0:d}'.format(p) for p in y_tick_locations]
    pyplot.yticks(y_tick_locations, y_tick_labels)

    x_tick_locations = numpy.linspace(0,
                                      num_sounding_fields - 1,
                                      num=num_sounding_fields,
                                      dtype=float)
    x_tick_labels = [FIELD_NAME_TO_LATEX_DICT[f] for f in sounding_field_names]
    pyplot.xticks(x_tick_locations, x_tick_labels)

    colour_bar_object = plotting_utils.plot_linear_colour_bar(
        axes_object_or_matrix=axes_object,
        data_matrix=saliency_matrix,
        colour_map_object=colour_map_object,
        min_value=0.,
        max_value=max_absolute_colour_value,
        orientation_string='vertical',
        extend_min=True,
        extend_max=True)

    colour_bar_object.set_label('Saliency (absolute value)')
示例#14
0
def _check_args(option_dict):
    """Error-checks input arguments.

    L = number of levels in encoder = number of levels in decoder
    D = number of dense layers

    :param option_dict: Dictionary with the following keys.
    option_dict['input_dimensions']: numpy array with input dimensions
        (num_heights, num_channels).
    option_dict['num_levels']: L in the above discussion.
    option_dict['num_conv_layers_by_level']: length-(L + 1) numpy array with
        number of conv layers at each level.
    option_dict['num_channels_by_level']: length-(L + 1) numpy array with number
        of channels at each level.
    option_dict['encoder_dropout_rate_by_level']: length-(L + 1) numpy array
        with dropout rate for conv layers in encoder at each level.
    option_dict['upconv_dropout_rate_by_level']: length-L numpy array
        with dropout rate for upconv layers at each level.
    option_dict['skip_dropout_rate_by_level']: length-L numpy array with dropout
        rate for conv layer after skip connection at each level.
    option_dict['include_penultimate_conv']: Boolean flag.  If True, will put in
        extra conv layer (with 3 x 3 filter) before final pixelwise conv.
    option_dict['penultimate_conv_dropout_rate']: Dropout rate for penultimate
        conv layer.
    option_dict['dense_layer_neuron_nums']: length-D numpy array with number of
        neurons in each dense layer.
    option_dict['dense_layer_dropout_rates']: length-D numpy array with dropout
        rate for each dense layer.
    option_dict['inner_activ_function_name']: Name of activation function for
        all inner (non-output) layers.  Must be accepted by
        `architecture_utils.check_activation_function`.
    option_dict['inner_activ_function_alpha']: Alpha (slope parameter) for
        activation function for all inner layers.  Applies only to ReLU and eLU.
    option_dict['output_activ_function_name']: Same as
        `inner_activ_function_name` but for output layer.
    option_dict['output_activ_function_alpha']: Same as
        `inner_activ_function_alpha` but for output layer.
    option_dict['l1_weight']: Weight for L_1 regularization.
    option_dict['l2_weight']: Weight for L_2 regularization.
    option_dict['use_batch_normalization']: Boolean flag.  If True, will use
        batch normalization after each inner (non-output) conv layer.

    :return: option_dict: Same as input, except defaults may have been added.
    """

    orig_option_dict = option_dict.copy()
    option_dict = DEFAULT_ARCHITECTURE_OPTION_DICT.copy()
    option_dict.update(orig_option_dict)

    input_dimensions = option_dict[INPUT_DIMENSIONS_KEY]
    error_checking.assert_is_numpy_array(input_dimensions,
                                         exact_dimensions=numpy.array(
                                             [2], dtype=int))
    error_checking.assert_is_integer_numpy_array(input_dimensions)
    error_checking.assert_is_greater_numpy_array(input_dimensions, 0)

    num_levels = option_dict[NUM_LEVELS_KEY]
    error_checking.assert_is_integer(num_levels)
    error_checking.assert_is_geq(num_levels, 2)

    expected_dim = numpy.array([num_levels + 1], dtype=int)

    num_conv_layers_by_level = option_dict[CONV_LAYER_COUNTS_KEY]
    error_checking.assert_is_numpy_array(num_conv_layers_by_level,
                                         exact_dimensions=expected_dim)
    error_checking.assert_is_integer_numpy_array(num_conv_layers_by_level)
    error_checking.assert_is_greater_numpy_array(num_conv_layers_by_level, 0)

    num_channels_by_level = option_dict[CHANNEL_COUNTS_KEY]
    error_checking.assert_is_numpy_array(num_channels_by_level,
                                         exact_dimensions=expected_dim)
    error_checking.assert_is_integer_numpy_array(num_channels_by_level)
    error_checking.assert_is_greater_numpy_array(num_channels_by_level, 0)

    encoder_dropout_rate_by_level = option_dict[ENCODER_DROPOUT_RATES_KEY]
    error_checking.assert_is_numpy_array(encoder_dropout_rate_by_level,
                                         exact_dimensions=expected_dim)
    error_checking.assert_is_leq_numpy_array(encoder_dropout_rate_by_level,
                                             1.,
                                             allow_nan=True)

    expected_dim = numpy.array([num_levels], dtype=int)

    upconv_dropout_rate_by_level = option_dict[UPCONV_DROPOUT_RATES_KEY]
    error_checking.assert_is_numpy_array(upconv_dropout_rate_by_level,
                                         exact_dimensions=expected_dim)
    error_checking.assert_is_leq_numpy_array(upconv_dropout_rate_by_level,
                                             1.,
                                             allow_nan=True)

    skip_dropout_rate_by_level = option_dict[SKIP_DROPOUT_RATES_KEY]
    error_checking.assert_is_numpy_array(skip_dropout_rate_by_level,
                                         exact_dimensions=expected_dim)
    error_checking.assert_is_leq_numpy_array(skip_dropout_rate_by_level,
                                             1.,
                                             allow_nan=True)

    error_checking.assert_is_boolean(option_dict[INCLUDE_PENULTIMATE_KEY])
    error_checking.assert_is_leq(option_dict[PENULTIMATE_DROPOUT_RATE_KEY],
                                 1.,
                                 allow_nan=True)

    dense_layer_neuron_nums = option_dict[DENSE_LAYER_NEURON_NUMS_KEY]
    dense_layer_dropout_rates = option_dict[DENSE_LAYER_DROPOUT_RATES_KEY]
    has_dense_layers = not (dense_layer_neuron_nums is None
                            and dense_layer_dropout_rates is None)

    if has_dense_layers:
        error_checking.assert_is_integer_numpy_array(dense_layer_neuron_nums)
        error_checking.assert_is_numpy_array(dense_layer_neuron_nums,
                                             num_dimensions=1)
        error_checking.assert_is_geq_numpy_array(dense_layer_neuron_nums, 1)

        num_dense_layers = len(dense_layer_neuron_nums)
        expected_dim = numpy.array([num_dense_layers], dtype=int)

        error_checking.assert_is_numpy_array(dense_layer_dropout_rates,
                                             exact_dimensions=expected_dim)
        error_checking.assert_is_leq_numpy_array(dense_layer_dropout_rates,
                                                 1.,
                                                 allow_nan=True)

    error_checking.assert_is_geq(option_dict[L1_WEIGHT_KEY], 0.)
    error_checking.assert_is_geq(option_dict[L2_WEIGHT_KEY], 0.)
    error_checking.assert_is_boolean(option_dict[USE_BATCH_NORM_KEY])

    return option_dict
def find_convective_pixels(reflectivity_matrix_dbz, grid_metadata_dict,
                           valid_time_unix_sec, option_dict):
    """Classifies pixels (horiz grid points) as convective or non-convective.

    :param reflectivity_matrix_dbz: M-by-N-by-H numpy array of reflectivity
        values.  Latitude should increase along the first axis; longitude should
        increase along the second axis; height should increase along the third
        axis.  MAKE SURE NOT TO FLIP YOUR LATITUDES.

    :param grid_metadata_dict: Dictionary with the following keys.
    grid_metadata_dict['min_grid_point_latitude_deg']: Minimum latitude (deg N)
        over all grid points.
    grid_metadata_dict['latitude_spacing_deg']: Spacing (deg N) between grid
        points in adjacent rows.
    grid_metadata_dict['min_grid_point_longitude_deg']: Minimum longitude
        (deg E) over all grid points.
    grid_metadata_dict['longitude_spacing_deg']: Spacing (deg E) between grid
        points in adjacent columns.
    grid_metadata_dict['grid_point_heights_m_asl']: length-H numpy array of
        heights (metres above sea level) at grid points.

    :param valid_time_unix_sec: Valid time.

    :param option_dict: Dictionary with the following keys.
    option_dict['peakedness_neigh_metres'] Neighbourhood radius for peakedness
        calculations (metres), used for criterion 1.
    option_dict['max_peakedness_height_m_asl'] Max height (metres above sea
        level) for peakedness calculations, used in criterion 1.
    option_dict['min_height_fraction_for_peakedness']: Minimum fraction of
        heights that exceed peakedness threshold, used in criterion 1.  At each
        horizontal location, at least this fraction of heights must exceed the
        threshold.
    option_dict['halve_resolution_for_peakedness'] Boolean flag.  If True,
        horizontal grid resolution will be halved for peakedness calculations.
    option_dict['min_echo_top_m_asl'] Minimum echo top (metres above sea level),
        used for criterion 3.
    option_dict['echo_top_level_dbz'] Critical reflectivity (used to compute
        echo top for criterion 3).
    option_dict['min_size_pixels']: Minimum connected-region size (for criterion
        4).
    option_dict['min_composite_refl_criterion1_dbz'] Minimum composite
        (column-max) reflectivity for criterion 1.  This may be None.
    option_dict['min_composite_refl_criterion5_dbz'] Minimum composite
        reflectivity for criterion 5.
    option_dict['min_composite_refl_aml_dbz'] Minimum composite reflectivity
        above melting level, used for criterion 2.

    :return: convective_flag_matrix: M-by-N numpy array of Boolean flags (True
        if convective, False if not).
    :return: option_dict: Same as input, except some values may have been
        replaced by defaults.
    """

    # Error-checking.
    error_checking.assert_is_numpy_array(reflectivity_matrix_dbz,
                                         num_dimensions=3)

    option_dict = _check_input_args(option_dict)

    peakedness_neigh_metres = option_dict[PEAKEDNESS_NEIGH_KEY]
    max_peakedness_height_m_asl = option_dict[MAX_PEAKEDNESS_HEIGHT_KEY]
    min_height_fraction_for_peakedness = option_dict[MIN_HEIGHT_FRACTION_KEY]
    halve_resolution_for_peakedness = option_dict[HALVE_RESOLUTION_KEY]
    min_echo_top_m_asl = option_dict[MIN_ECHO_TOP_KEY]
    echo_top_level_dbz = option_dict[ECHO_TOP_LEVEL_KEY]
    min_size_pixels = option_dict[MIN_SIZE_KEY]
    min_composite_refl_criterion1_dbz = (
        option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY])
    min_composite_refl_criterion5_dbz = (
        option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY])
    min_composite_refl_aml_dbz = option_dict[MIN_COMPOSITE_REFL_AML_KEY]

    grid_point_heights_m_asl = numpy.round(
        grid_metadata_dict[HEIGHTS_KEY]).astype(int)

    error_checking.assert_is_numpy_array(grid_point_heights_m_asl,
                                         num_dimensions=1)
    error_checking.assert_is_geq_numpy_array(grid_point_heights_m_asl, 0)
    error_checking.assert_is_greater_numpy_array(
        numpy.diff(grid_point_heights_m_asl), 0)

    # Compute grid-point coordinates.
    num_rows = reflectivity_matrix_dbz.shape[0]
    num_columns = reflectivity_matrix_dbz.shape[1]

    grid_point_latitudes_deg, grid_point_longitudes_deg = (
        grids.get_latlng_grid_points(
            min_latitude_deg=grid_metadata_dict[MIN_LATITUDE_KEY],
            min_longitude_deg=grid_metadata_dict[MIN_LONGITUDE_KEY],
            lat_spacing_deg=grid_metadata_dict[LATITUDE_SPACING_KEY],
            lng_spacing_deg=grid_metadata_dict[LONGITUDE_SPACING_KEY],
            num_rows=num_rows,
            num_columns=num_columns))

    grid_metadata_dict[LATITUDES_KEY] = grid_point_latitudes_deg
    grid_metadata_dict[LONGITUDES_KEY] = grid_point_longitudes_deg
    reflectivity_matrix_dbz[numpy.isnan(reflectivity_matrix_dbz)] = 0.

    print('Applying criterion 1 for convective classification...')
    convective_flag_matrix = _apply_convective_criterion1(
        reflectivity_matrix_dbz=reflectivity_matrix_dbz,
        peakedness_neigh_metres=peakedness_neigh_metres,
        max_peakedness_height_m_asl=max_peakedness_height_m_asl,
        min_height_fraction=min_height_fraction_for_peakedness,
        halve_resolution_for_peakedness=halve_resolution_for_peakedness,
        min_composite_refl_dbz=min_composite_refl_criterion1_dbz,
        grid_metadata_dict=grid_metadata_dict)

    print('Number of convective pixels = {0:d}'.format(
        numpy.sum(convective_flag_matrix)))

    print('Applying criterion 2 for convective classification...')
    convective_flag_matrix = _apply_convective_criterion2(
        reflectivity_matrix_dbz=reflectivity_matrix_dbz,
        convective_flag_matrix=convective_flag_matrix,
        grid_metadata_dict=grid_metadata_dict,
        valid_time_unix_sec=valid_time_unix_sec,
        min_composite_refl_aml_dbz=min_composite_refl_aml_dbz)

    print('Number of convective pixels = {0:d}'.format(
        numpy.sum(convective_flag_matrix)))

    print('Applying criterion 3 for convective classification...')
    convective_flag_matrix = _apply_convective_criterion3(
        reflectivity_matrix_dbz=reflectivity_matrix_dbz,
        convective_flag_matrix=convective_flag_matrix,
        grid_metadata_dict=grid_metadata_dict,
        min_echo_top_m_asl=min_echo_top_m_asl,
        echo_top_level_dbz=echo_top_level_dbz)

    print('Number of convective pixels = {0:d}'.format(
        numpy.sum(convective_flag_matrix)))

    print('Applying criterion 4 for convective classification...')
    convective_flag_matrix = _apply_convective_criterion4(
        convective_flag_matrix=convective_flag_matrix,
        min_size_pixels=min_size_pixels)

    print('Number of convective pixels = {0:d}'.format(
        numpy.sum(convective_flag_matrix)))

    print('Applying criterion 5 for convective classification...')
    convective_flag_matrix = _apply_convective_criterion5(
        reflectivity_matrix_dbz=reflectivity_matrix_dbz,
        convective_flag_matrix=convective_flag_matrix,
        min_composite_refl_dbz=min_composite_refl_criterion5_dbz)

    return convective_flag_matrix, option_dict
def write_standard_file(pickle_file_name,
                        denorm_predictor_matrices,
                        saliency_matrices,
                        full_storm_id_strings,
                        storm_times_unix_sec,
                        model_file_name,
                        metadata_dict,
                        sounding_pressure_matrix_pa=None):
    """Writes saliency maps (one per storm object) to Pickle file.

    E = number of examples (storm objects)
    H = number of sounding heights

    :param pickle_file_name: Path to output file.
    :param denorm_predictor_matrices: See doc for `_check_in_and_out_matrices`.
    :param saliency_matrices: Same.
    :param full_storm_id_strings: length-E list of storm IDs.
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param model_file_name: Path to model that created saliency maps (readable
        by `cnn.read_model`).
    :param metadata_dict: Dictionary created by `check_metadata`.
    :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure
        levels.  Needed only if the model is trained with soundings but without
        pressure as a predictor.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_string_list(full_storm_id_strings)
    error_checking.assert_is_numpy_array(numpy.array(full_storm_id_strings),
                                         num_dimensions=1)

    num_examples = len(full_storm_id_strings)
    these_expected_dim = numpy.array([num_examples], dtype=int)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(storm_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    _check_in_and_out_matrices(predictor_matrices=denorm_predictor_matrices,
                               num_examples=num_examples,
                               saliency_matrices=saliency_matrices)

    if sounding_pressure_matrix_pa is not None:
        error_checking.assert_is_numpy_array_without_nan(
            sounding_pressure_matrix_pa)
        error_checking.assert_is_greater_numpy_array(
            sounding_pressure_matrix_pa, 0.)
        error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa,
                                             num_dimensions=2)

        these_expected_dim = numpy.array(
            (num_examples, ) + sounding_pressure_matrix_pa.shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim)

    saliency_dict = {
        PREDICTOR_MATRICES_KEY: denorm_predictor_matrices,
        SALIENCY_MATRICES_KEY: saliency_matrices,
        FULL_STORM_IDS_KEY: full_storm_id_strings,
        STORM_TIMES_KEY: storm_times_unix_sec,
        MODEL_FILE_KEY: model_file_name,
        COMPONENT_TYPE_KEY: metadata_dict[COMPONENT_TYPE_KEY],
        TARGET_CLASS_KEY: metadata_dict[TARGET_CLASS_KEY],
        LAYER_NAME_KEY: metadata_dict[LAYER_NAME_KEY],
        IDEAL_ACTIVATION_KEY: metadata_dict[IDEAL_ACTIVATION_KEY],
        NEURON_INDICES_KEY: metadata_dict[NEURON_INDICES_KEY],
        CHANNEL_INDEX_KEY: metadata_dict[CHANNEL_INDEX_KEY],
        SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(saliency_dict, pickle_file_handle)
    pickle_file_handle.close()
def write_standard_file(pickle_file_name,
                        denorm_input_matrices,
                        denorm_output_matrices,
                        initial_activations,
                        final_activations,
                        model_file_name,
                        metadata_dict,
                        full_storm_id_strings=None,
                        storm_times_unix_sec=None,
                        sounding_pressure_matrix_pa=None):
    """Writes backwards-optimized examples to Pickle file.

    E = number of examples (storm objects)
    H = number of sounding heights

    If input matrices do not come from real examples, `full_storm_id_strings`
    and `storm_times_unix_sec` can be None.

    :param pickle_file_name: Path to output file.
    :param denorm_input_matrices: See doc for `_check_in_and_out_matrices`.
    :param denorm_output_matrices: Same.
    :param initial_activations: length-E numpy array of initial model
        activations (before backwards optimization).
    :param final_activations: length-E numpy array of final model activations
        (after backwards optimization).
    :param model_file_name: Path to model that created saliency maps (readable
        by `cnn.read_model`).
    :param metadata_dict: Dictionary created by `check_metadata`.
    :param full_storm_id_strings: length-E list of storm IDs.
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param sounding_pressure_matrix_pa: E-by-H numpy array of pressure
        levels.  Needed only if `denorm_input_matrices` contains soundings from
        real examples but without pressure as a predictor.
    """

    error_checking.assert_is_string(model_file_name)
    used_real_examples = not (full_storm_id_strings is None
                              and storm_times_unix_sec is None)

    if used_real_examples:
        error_checking.assert_is_string_list(full_storm_id_strings)
        error_checking.assert_is_numpy_array(
            numpy.array(full_storm_id_strings), num_dimensions=1)

        num_examples = len(full_storm_id_strings)
        these_expected_dim = numpy.array([num_examples], dtype=int)

        error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
        error_checking.assert_is_numpy_array(
            storm_times_unix_sec, exact_dimensions=these_expected_dim)
    else:
        num_examples = denorm_input_matrices[0].shape[0]
        sounding_pressure_matrix_pa = None

    _check_in_and_out_matrices(input_matrices=denorm_input_matrices,
                               num_examples=num_examples,
                               output_matrices=denorm_output_matrices)

    these_expected_dim = numpy.array([num_examples], dtype=int)

    error_checking.assert_is_numpy_array_without_nan(initial_activations)
    error_checking.assert_is_numpy_array(initial_activations,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_numpy_array_without_nan(final_activations)
    error_checking.assert_is_numpy_array(final_activations,
                                         exact_dimensions=these_expected_dim)

    if sounding_pressure_matrix_pa is not None:
        error_checking.assert_is_numpy_array_without_nan(
            sounding_pressure_matrix_pa)
        error_checking.assert_is_greater_numpy_array(
            sounding_pressure_matrix_pa, 0.)
        error_checking.assert_is_numpy_array(sounding_pressure_matrix_pa,
                                             num_dimensions=2)

        these_expected_dim = numpy.array(
            (num_examples, ) + sounding_pressure_matrix_pa.shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            sounding_pressure_matrix_pa, exact_dimensions=these_expected_dim)

    bwo_dictionary = {
        INPUT_MATRICES_KEY: denorm_input_matrices,
        OUTPUT_MATRICES_KEY: denorm_output_matrices,
        INITIAL_ACTIVATIONS_KEY: initial_activations,
        FINAL_ACTIVATIONS_KEY: final_activations,
        MODEL_FILE_KEY: model_file_name,
        FULL_STORM_IDS_KEY: full_storm_id_strings,
        STORM_TIMES_KEY: storm_times_unix_sec,
        NUM_ITERATIONS_KEY: metadata_dict[NUM_ITERATIONS_KEY],
        LEARNING_RATE_KEY: metadata_dict[LEARNING_RATE_KEY],
        L2_WEIGHT_KEY: metadata_dict[L2_WEIGHT_KEY],
        RADAR_CONSTRAINT_WEIGHT_KEY:
        metadata_dict[RADAR_CONSTRAINT_WEIGHT_KEY],
        MINMAX_CONSTRAINT_WEIGHT_KEY:
        metadata_dict[MINMAX_CONSTRAINT_WEIGHT_KEY],
        COMPONENT_TYPE_KEY: metadata_dict[COMPONENT_TYPE_KEY],
        TARGET_CLASS_KEY: metadata_dict[TARGET_CLASS_KEY],
        LAYER_NAME_KEY: metadata_dict[LAYER_NAME_KEY],
        IDEAL_ACTIVATION_KEY: metadata_dict[IDEAL_ACTIVATION_KEY],
        NEURON_INDICES_KEY: metadata_dict[NEURON_INDICES_KEY],
        CHANNEL_INDEX_KEY: metadata_dict[CHANNEL_INDEX_KEY],
        SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pa
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(bwo_dictionary, pickle_file_handle)
    pickle_file_handle.close()
示例#18
0
def train_neural_net(
        training_table, feature_names, target_name, replace_missing,
        standardize, transform_via_svd,
        replacement_method=feature_trans.MEAN_VALUE_REPLACEMENT_METHOD,
        fraction_of_explained_variance_for_svd=
        DEFAULT_EXP_VARIANCE_FRACTION_FOR_SVD,
        hidden_layer_sizes=DEFAULT_HIDDEN_LAYER_SIZES_FOR_NN,
        hidden_layer_activation_function=DEFAULT_ACTIVATION_FUNCTION_FOR_NN,
        solver=DEFAULT_SOLVER_FOR_NN, l2_weight=DEFAULT_L2_WEIGHT_FOR_NN,
        num_examples_per_batch=DEFAULT_BATCH_SIZE_FOR_NN,
        learning_rate=DEFAULT_LEARNING_RATE_FOR_NN,
        max_num_epochs=DEFAULT_MAX_NUM_EPOCHS_FOR_NN,
        convergence_tolerance=DEFAULT_CONVERGENCE_TOLERANCE_FOR_NN,
        allow_early_stopping=True,
        early_stopping_fraction=DEFAULT_EARLY_STOPPING_FRACTION_FOR_NN):
    """Trains a neural net for binary classification.

    H = number of hidden layers

    :param training_table: See documentation for _check_training_data.
    :param feature_names: See doc for _check_training_data.
    :param target_name: See doc for _check_training_data.
    :param replace_missing: See documentation for _preprocess_data_for_learning.
    :param standardize: See doc for _preprocess_data_for_learning.
    :param transform_via_svd: See doc for _preprocess_data_for_learning.
    :param replacement_method: See doc for _preprocess_data_for_learning.
    :param fraction_of_explained_variance_for_svd: See doc for
        _preprocess_data_for_learning.
    :param hidden_layer_sizes: length-H numpy array, where the [i]th element is
        the number of nodes in the [i]th hidden layer.
    :param hidden_layer_activation_function: Activation function for hidden
        layers.  See `sklearn.neural_network.MLPClassifier` documentation for
        valid options.
    :param solver:  Solver.  Valid options are "sgd" and "adam".
    :param l2_weight: Weight for L2 penalty.
    :param num_examples_per_batch: Number of examples per training batch.
    :param learning_rate: Learning rate.
    :param max_num_epochs: Max number of training epochs (passes over training
        data).
    :param convergence_tolerance: Stopping criterion.  Training will stop when
        loss has improved by < `convergence_tolerance` for each of two
        consecutive epochs.
    :param allow_early_stopping: Boolean flag.  If True, some training data will
        be set aside as "validation data" to check for early stopping.  In this
        case, training will stop when loss has improved by <
        `convergence_tolerance` for each of two consecutive epochs.
    :param early_stopping_fraction: Fraction of training examples to use when
        checking early-stopping criterion.
    :return: model_object: Trained model (instance of
        `sklearn.neural_network.MLPClassifier`).
    :return: replacement_dict: See doc for _preprocess_data_for_learning.
    :return: standardization_dict: See doc for _preprocess_data_for_learning.
    :return: svd_dictionary: See doc for _preprocess_data_for_learning.
    :raises: ValueError: if `solver not in VALID_SOLVERS_FOR_NN`.
    """

    _check_input_data_for_learning(
        input_table=training_table, feature_names=feature_names,
        target_name=target_name)

    (preprocessed_training_table, preprocessed_feature_names, replacement_dict,
     standardization_dict, svd_dictionary) = _preprocess_data_for_learning(
         input_table=training_table, feature_names=feature_names,
         learning_phase=TRAINING_PHASE, replace_missing=replace_missing,
         standardize=standardize, transform_via_svd=transform_via_svd,
         replacement_method=replacement_method,
         fraction_of_explained_variance_for_svd=
         fraction_of_explained_variance_for_svd)

    error_checking.assert_is_integer_numpy_array(hidden_layer_sizes)
    error_checking.assert_is_numpy_array(hidden_layer_sizes, num_dimensions=1)
    error_checking.assert_is_greater_numpy_array(hidden_layer_sizes, 0)

    error_checking.assert_is_string(solver)
    if solver not in VALID_SOLVERS_FOR_NN:
        error_string = (
            '\n\n{0:s}\n\nValid solvers (listed above) do not include "{1:s}".'
        ).format(str(VALID_SOLVERS_FOR_NN), solver)

        raise ValueError(error_string)

    error_checking.assert_is_integer(num_examples_per_batch)
    error_checking.assert_is_geq(num_examples_per_batch, 2)
    error_checking.assert_is_greater(learning_rate, 0.)
    error_checking.assert_is_leq(learning_rate, 1.)
    error_checking.assert_is_integer(max_num_epochs)
    error_checking.assert_is_greater(max_num_epochs, 0)
    error_checking.assert_is_greater(convergence_tolerance, 0.)
    error_checking.assert_is_boolean(allow_early_stopping)

    if allow_early_stopping:
        error_checking.assert_is_greater(early_stopping_fraction, 0.)
        error_checking.assert_is_less_than(early_stopping_fraction, 0.5)

    model_object = sklearn.neural_network.MLPClassifier(
        hidden_layer_sizes=hidden_layer_sizes,
        activation=hidden_layer_activation_function, solver=solver,
        alpha=l2_weight, batch_size=num_examples_per_batch,
        learning_rate_init=learning_rate, max_iter=max_num_epochs,
        tol=convergence_tolerance, verbose=3,
        early_stopping=allow_early_stopping,
        validation_fraction=early_stopping_fraction)

    model_object.fit(
        preprocessed_training_table.as_matrix(
            columns=preprocessed_feature_names),
        preprocessed_training_table[target_name].values)
    return model_object, replacement_dict, standardization_dict, svd_dictionary
示例#19
0
def write_file(netcdf_file_name,
               scalar_target_matrix,
               vector_target_matrix,
               scalar_prediction_matrix,
               vector_prediction_matrix,
               heights_m_agl,
               example_id_strings,
               model_file_name,
               isotonic_model_file_name=None):
    """Writes predictions to NetCDF file.

    E = number of examples
    H = number of heights
    T_s = number of scalar targets
    T_v = number of vector targets

    :param netcdf_file_name: Path to output file.
    :param scalar_target_matrix: numpy array (E x T_s) with actual values of
        scalar targets.
    :param vector_target_matrix: numpy array (E x H x T_v) with actual values of
        vector targets.
    :param scalar_prediction_matrix: Same as `scalar_target_matrix` but with
        predicted values.
    :param vector_prediction_matrix: Same as `vector_target_matrix` but with
        predicted values.
    :param heights_m_agl: length-H numpy array of heights (metres above ground
        level).
    :param example_id_strings: length-E list of IDs created by
        `example_utils.create_example_ids`.
    :param model_file_name: Path to file with trained model (readable by
        `neural_net.read_model`).
    :param isotonic_model_file_name: Path to file with trained isotonic-
        regression models (readable by `isotonic_regression.read_file`) used to
        make predictions.  If isotonic regression was not used, leave this as
        None.
    """

    # Check input args.
    error_checking.assert_is_numpy_array_without_nan(scalar_target_matrix)
    error_checking.assert_is_numpy_array(scalar_target_matrix,
                                         num_dimensions=2)

    error_checking.assert_is_numpy_array_without_nan(scalar_prediction_matrix)
    error_checking.assert_is_numpy_array(scalar_prediction_matrix,
                                         exact_dimensions=numpy.array(
                                             scalar_target_matrix.shape,
                                             dtype=int))

    error_checking.assert_is_numpy_array_without_nan(vector_target_matrix)
    error_checking.assert_is_numpy_array(vector_target_matrix,
                                         num_dimensions=3)

    num_examples = scalar_target_matrix.shape[0]
    expected_dim = numpy.array(
        (num_examples, ) + vector_target_matrix.shape[1:], dtype=int)
    error_checking.assert_is_numpy_array(vector_target_matrix,
                                         exact_dimensions=expected_dim)

    error_checking.assert_is_numpy_array_without_nan(vector_prediction_matrix)
    error_checking.assert_is_numpy_array(vector_prediction_matrix,
                                         exact_dimensions=numpy.array(
                                             vector_target_matrix.shape,
                                             dtype=int))

    num_heights = vector_target_matrix.shape[1]
    error_checking.assert_is_greater_numpy_array(heights_m_agl, 0.)
    error_checking.assert_is_numpy_array(heights_m_agl,
                                         exact_dimensions=numpy.array(
                                             [num_heights], dtype=int))

    error_checking.assert_is_numpy_array(numpy.array(example_id_strings),
                                         exact_dimensions=numpy.array(
                                             [num_examples], dtype=int))
    example_utils.parse_example_ids(example_id_strings)

    error_checking.assert_is_string(model_file_name)
    if isotonic_model_file_name is None:
        isotonic_model_file_name = ''
    error_checking.assert_is_string(isotonic_model_file_name)

    # Write to NetCDF file.
    file_system_utils.mkdir_recursive_if_necessary(file_name=netcdf_file_name)
    dataset_object = netCDF4.Dataset(netcdf_file_name,
                                     'w',
                                     format='NETCDF3_64BIT_OFFSET')

    dataset_object.setncattr(MODEL_FILE_KEY, model_file_name)
    dataset_object.setncattr(ISOTONIC_MODEL_FILE_KEY, isotonic_model_file_name)

    num_examples = vector_target_matrix.shape[0]
    dataset_object.createDimension(EXAMPLE_DIMENSION_KEY, num_examples)
    dataset_object.createDimension(HEIGHT_DIMENSION_KEY,
                                   vector_target_matrix.shape[1])
    dataset_object.createDimension(VECTOR_TARGET_DIMENSION_KEY,
                                   vector_target_matrix.shape[2])

    num_scalar_targets = scalar_target_matrix.shape[1]
    if num_scalar_targets > 0:
        dataset_object.createDimension(SCALAR_TARGET_DIMENSION_KEY,
                                       scalar_target_matrix.shape[1])

    if num_examples == 0:
        num_id_characters = 1
    else:
        num_id_characters = numpy.max(
            numpy.array([len(id) for id in example_id_strings]))

    dataset_object.createDimension(EXAMPLE_ID_CHAR_DIM_KEY, num_id_characters)

    this_string_format = 'S{0:d}'.format(num_id_characters)
    example_ids_char_array = netCDF4.stringtochar(
        numpy.array(example_id_strings, dtype=this_string_format))

    dataset_object.createVariable(EXAMPLE_IDS_KEY,
                                  datatype='S1',
                                  dimensions=(EXAMPLE_DIMENSION_KEY,
                                              EXAMPLE_ID_CHAR_DIM_KEY))
    dataset_object.variables[EXAMPLE_IDS_KEY][:] = numpy.array(
        example_ids_char_array)

    dataset_object.createVariable(HEIGHTS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=HEIGHT_DIMENSION_KEY)
    dataset_object.variables[HEIGHTS_KEY][:] = heights_m_agl

    if num_scalar_targets > 0:
        dataset_object.createVariable(SCALAR_TARGETS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=(EXAMPLE_DIMENSION_KEY,
                                                  SCALAR_TARGET_DIMENSION_KEY))
        dataset_object.variables[SCALAR_TARGETS_KEY][:] = scalar_target_matrix

        dataset_object.createVariable(SCALAR_PREDICTIONS_KEY,
                                      datatype=numpy.float32,
                                      dimensions=(EXAMPLE_DIMENSION_KEY,
                                                  SCALAR_TARGET_DIMENSION_KEY))
        dataset_object.variables[SCALAR_PREDICTIONS_KEY][:] = (
            scalar_prediction_matrix)

    these_dimensions = (EXAMPLE_DIMENSION_KEY, HEIGHT_DIMENSION_KEY,
                        VECTOR_TARGET_DIMENSION_KEY)

    dataset_object.createVariable(VECTOR_TARGETS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=these_dimensions)
    dataset_object.variables[VECTOR_TARGETS_KEY][:] = vector_target_matrix

    dataset_object.createVariable(VECTOR_PREDICTIONS_KEY,
                                  datatype=numpy.float32,
                                  dimensions=these_dimensions)
    dataset_object.variables[VECTOR_PREDICTIONS_KEY][:] = (
        vector_prediction_matrix)

    dataset_object.close()
示例#20
0
def _run(example_file_name, num_examples, choose_max_heating_rate,
         max_noise_k_day01, pressure_cutoffs_pa, pressure_spacings_pa,
         first_interp_method_name, second_interp_method_name, interp_fluxes,
         output_dir_name):
    """Runs interpolation experiment.

    This is effectively the main method.

    :param example_file_name: See documentation at top of file.
    :param num_examples: Same.
    :param choose_max_heating_rate: Same.
    :param max_noise_k_day01: Same.
    :param pressure_cutoffs_pa: Same.
    :param pressure_spacings_pa: Same.
    :param first_interp_method_name: Same.
    :param second_interp_method_name: Same.
    :param interp_fluxes: Same.
    :param output_dir_name: Same.
    """

    if interp_fluxes:
        max_noise_k_day01 = 0.

    error_checking.assert_is_greater(num_examples, 0)
    error_checking.assert_is_geq(max_noise_k_day01, 0.)

    error_checking.assert_is_geq_numpy_array(pressure_cutoffs_pa, 0.)
    error_checking.assert_is_greater_numpy_array(
        numpy.diff(pressure_cutoffs_pa), 0.)
    error_checking.assert_is_greater_numpy_array(pressure_spacings_pa, 0.)

    num_spacings = len(pressure_spacings_pa)
    expected_dim = numpy.array([num_spacings + 1], dtype=int)
    error_checking.assert_is_numpy_array(pressure_cutoffs_pa,
                                         exact_dimensions=expected_dim)

    high_res_pressures_pa = numpy.array([], dtype=float)

    for i in range(num_spacings):
        this_num_pressures = int(
            numpy.ceil(1 +
                       (pressure_cutoffs_pa[i + 1] - pressure_cutoffs_pa[i]) /
                       pressure_spacings_pa[i]))
        these_pressures_pa = numpy.linspace(pressure_cutoffs_pa[i],
                                            pressure_cutoffs_pa[i + 1],
                                            num=this_num_pressures,
                                            dtype=float)

        if i != num_spacings - 1:
            these_pressures_pa = these_pressures_pa[:-1]

        high_res_pressures_pa = numpy.concatenate(
            (high_res_pressures_pa, these_pressures_pa))

    print('Number of levels in high-resolution grid = {0:d}'.format(
        len(high_res_pressures_pa)))

    if high_res_pressures_pa[0] < TOLERANCE:
        high_res_pressures_pa[0] = 0.5 * high_res_pressures_pa[1]

    high_res_pressures_pa = high_res_pressures_pa[::-1]
    high_res_heights_m_asl = standard_atmo.pressure_to_height(
        high_res_pressures_pa)

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    print('Reading data from: "{0:s}"...'.format(example_file_name))
    example_dict = example_io.read_file(example_file_name)

    heating_rate_matrix_k_day01 = example_utils.get_field_from_dict(
        example_dict=example_dict,
        field_name=example_utils.SHORTWAVE_HEATING_RATE_NAME)

    if choose_max_heating_rate:
        hr_criterion_by_example = numpy.max(heating_rate_matrix_k_day01,
                                            axis=1)
    else:
        abs_diff_matrix = numpy.absolute(
            numpy.diff(heating_rate_matrix_k_day01[:, :-1], axis=1))
        hr_criterion_by_example = numpy.max(abs_diff_matrix, axis=1)

    good_indices = numpy.argsort(-1 * hr_criterion_by_example)
    good_indices = good_indices[:num_examples]
    example_dict = example_utils.subset_by_index(example_dict=example_dict,
                                                 desired_indices=good_indices)

    num_examples = len(good_indices)
    max_differences_k_day01 = numpy.full(num_examples, numpy.nan)

    for i in range(num_examples):
        max_differences_k_day01[i] = _run_experiment_one_example(
            example_dict=example_dict,
            example_index=i,
            max_noise_k_day01=max_noise_k_day01,
            high_res_pressures_pa=high_res_pressures_pa,
            high_res_heights_m_asl=high_res_heights_m_asl,
            first_interp_method_name=first_interp_method_name,
            second_interp_method_name=second_interp_method_name,
            interp_fluxes=interp_fluxes,
            output_dir_name=output_dir_name)

    print('Average max difference = {0:.4f} K day^-1'.format(
        numpy.mean(max_differences_k_day01)))
    print('Median max difference = {0:.4f} K day^-1'.format(
        numpy.median(max_differences_k_day01)))
    print('Max max difference = {0:.4f} K day^-1'.format(
        numpy.max(max_differences_k_day01)))
示例#21
0
    def test_assert_is_positive_numpy_array_true(self):
        """Checks assert_is_greater_numpy_array; base_value = 0, inputs > 0."""

        error_checking.assert_is_greater_numpy_array(POSITIVE_NUMPY_ARRAY, 0)
示例#22
0
def _run(saliency_file_names, monte_carlo_file_names, composite_names,
         colour_map_name, max_colour_values, half_num_contours,
         smoothing_radius_grid_cells, output_dir_name):
    """Makes figure with sanity checks for MYRORSS saliency maps.

    This is effectively the main method.

    :param saliency_file_names: See documentation at top of file.
    :param monte_carlo_file_names: Same.
    :param composite_names: Same.
    :param colour_map_name: Same.
    :param max_colour_values: Same.
    :param half_num_contours: Same.
    :param smoothing_radius_grid_cells: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    if smoothing_radius_grid_cells <= 0:
        smoothing_radius_grid_cells = None

    colour_map_object = pyplot.cm.get_cmap(colour_map_name)
    error_checking.assert_is_geq(half_num_contours, 5)

    num_composites = len(saliency_file_names)
    expected_dim = numpy.array([num_composites], dtype=int)
    error_checking.assert_is_numpy_array(numpy.array(composite_names),
                                         exact_dimensions=expected_dim)
    error_checking.assert_is_numpy_array(numpy.array(monte_carlo_file_names),
                                         exact_dimensions=expected_dim)

    monte_carlo_file_names = [
        None if f in NONE_STRINGS else f for f in monte_carlo_file_names
    ]

    error_checking.assert_is_greater_numpy_array(max_colour_values, 0.)
    error_checking.assert_is_numpy_array(max_colour_values,
                                         exact_dimensions=expected_dim)

    composite_names_abbrev = [
        n.replace('_', '-').lower() for n in composite_names
    ]
    composite_names_verbose = [
        '({0:s}) {1:s}'.format(chr(ord('a') + i),
                               composite_names[i].replace('_', ' '))
        for i in range(num_composites)
    ]

    panel_file_names = [None] * num_composites

    for i in range(num_composites):
        panel_file_names[i] = _plot_one_composite(
            saliency_file_name=saliency_file_names[i],
            monte_carlo_file_name=monte_carlo_file_names[i],
            composite_name_abbrev=composite_names_abbrev[i],
            composite_name_verbose=composite_names_verbose[i],
            colour_map_object=colour_map_object,
            max_colour_value=max_colour_values[i],
            half_num_contours=half_num_contours,
            smoothing_radius_grid_cells=smoothing_radius_grid_cells,
            output_dir_name=output_dir_name)

        _add_colour_bar(figure_file_name=panel_file_names[i],
                        colour_map_object=colour_map_object,
                        max_colour_value=max_colour_values[i],
                        temporary_dir_name=output_dir_name)

        print('\n')

    figure_file_name = '{0:s}/saliency_concat.jpg'.format(output_dir_name)
    print('Concatenating panels to: "{0:s}"...'.format(figure_file_name))

    num_panel_rows = int(numpy.floor(numpy.sqrt(num_composites)))
    num_panel_columns = int(numpy.ceil(float(num_composites) / num_panel_rows))

    imagemagick_utils.concatenate_images(input_file_names=panel_file_names,
                                         output_file_name=figure_file_name,
                                         border_width_pixels=100,
                                         num_panel_rows=num_panel_rows,
                                         num_panel_columns=num_panel_columns)
    imagemagick_utils.trim_whitespace(input_file_name=figure_file_name,
                                      output_file_name=figure_file_name,
                                      border_width_pixels=10)
示例#23
0
    def test_assert_is_positive_numpy_array_non_positive(self):
        """Checks assert_is_greater_numpy_array; base_value = 0, inputs <= 0."""

        with self.assertRaises(ValueError):
            error_checking.assert_is_greater_numpy_array(
                NON_POSITIVE_NUMPY_ARRAY, 0)
def write_standard_file(pickle_file_name,
                        list_of_input_matrices,
                        list_of_saliency_matrices,
                        storm_ids,
                        storm_times_unix_sec,
                        model_file_name,
                        saliency_metadata_dict,
                        sounding_pressure_matrix_pascals=None):
    """Writes saliency maps (one per example) to Pickle file.

    T = number of input tensors to the model
    E = number of examples (storm objects)
    H = number of height levels per sounding

    :param pickle_file_name: Path to output file.
    :param list_of_input_matrices: length-T list of numpy arrays, containing
        predictors (inputs to the model).  The first dimension of each array
        must have length E.
    :param list_of_saliency_matrices: length-T list of numpy arrays, containing
        saliency values.  list_of_saliency_matrices[i] must have the same
        dimensions as list_of_input_matrices[i].
    :param storm_ids: length-E list of storm IDs (strings).
    :param storm_times_unix_sec: length-E numpy array of storm times.
    :param model_file_name: Path to file with trained model (readable by
        `cnn.read_model`).
    :param saliency_metadata_dict: Dictionary created by `check_metadata`.
    :param sounding_pressure_matrix_pascals: E-by-H numpy array of pressure
        levels in soundings.  Useful only when the model input contains
        soundings with no pressure, because it is needed to plot soundings.
    :raises: ValueError: if `list_of_input_matrices` and
        `list_of_saliency_matrices` have different lengths.
    """

    error_checking.assert_is_string(model_file_name)
    error_checking.assert_is_string_list(storm_ids)
    error_checking.assert_is_numpy_array(numpy.array(storm_ids),
                                         num_dimensions=1)

    num_storm_objects = len(storm_ids)
    these_expected_dim = numpy.array([num_storm_objects], dtype=int)

    error_checking.assert_is_integer_numpy_array(storm_times_unix_sec)
    error_checking.assert_is_numpy_array(storm_times_unix_sec,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_list(list_of_input_matrices)
    error_checking.assert_is_list(list_of_saliency_matrices)
    num_input_matrices = len(list_of_input_matrices)
    num_saliency_matrices = len(list_of_saliency_matrices)

    if num_input_matrices != num_saliency_matrices:
        error_string = (
            'Number of input matrices ({0:d}) should equal number of saliency '
            'matrices ({1:d}).').format(num_input_matrices,
                                        num_saliency_matrices)

        raise ValueError(error_string)

    for i in range(num_input_matrices):
        error_checking.assert_is_numpy_array_without_nan(
            list_of_input_matrices[i])
        error_checking.assert_is_numpy_array_without_nan(
            list_of_saliency_matrices[i])

        these_expected_dim = numpy.array(
            (num_storm_objects, ) + list_of_input_matrices[i].shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            list_of_input_matrices[i], exact_dimensions=these_expected_dim)

        these_expected_dim = numpy.array(list_of_input_matrices[i].shape,
                                         dtype=int)
        error_checking.assert_is_numpy_array(
            list_of_saliency_matrices[i], exact_dimensions=these_expected_dim)

    if sounding_pressure_matrix_pascals is not None:
        error_checking.assert_is_numpy_array_without_nan(
            sounding_pressure_matrix_pascals)
        error_checking.assert_is_greater_numpy_array(
            sounding_pressure_matrix_pascals, 0.)
        error_checking.assert_is_numpy_array(sounding_pressure_matrix_pascals,
                                             num_dimensions=2)

        these_expected_dim = numpy.array(
            (num_storm_objects, ) + sounding_pressure_matrix_pascals.shape[1:],
            dtype=int)
        error_checking.assert_is_numpy_array(
            sounding_pressure_matrix_pascals,
            exact_dimensions=these_expected_dim)

    saliency_dict = {
        INPUT_MATRICES_KEY: list_of_input_matrices,
        SALIENCY_MATRICES_KEY: list_of_saliency_matrices,
        STORM_IDS_KEY: storm_ids,
        STORM_TIMES_KEY: storm_times_unix_sec,
        MODEL_FILE_NAME_KEY: model_file_name,
        COMPONENT_TYPE_KEY: saliency_metadata_dict[COMPONENT_TYPE_KEY],
        TARGET_CLASS_KEY: saliency_metadata_dict[TARGET_CLASS_KEY],
        LAYER_NAME_KEY: saliency_metadata_dict[LAYER_NAME_KEY],
        IDEAL_ACTIVATION_KEY: saliency_metadata_dict[IDEAL_ACTIVATION_KEY],
        NEURON_INDICES_KEY: saliency_metadata_dict[NEURON_INDICES_KEY],
        CHANNEL_INDEX_KEY: saliency_metadata_dict[CHANNEL_INDEX_KEY],
        SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pascals
    }

    file_system_utils.mkdir_recursive_if_necessary(file_name=pickle_file_name)
    pickle_file_handle = open(pickle_file_name, 'wb')
    pickle.dump(saliency_dict, pickle_file_handle)
    pickle_file_handle.close()
示例#25
0
    def test_assert_is_positive_numpy_array_mixed_sign(self):
        """assert_is_greater_numpy_array; base_value = 0, inputs mixed sign."""

        with self.assertRaises(ValueError):
            error_checking.assert_is_greater_numpy_array(
                MIXED_SIGN_NUMPY_ARRAY, 0)
示例#26
0
def find_events_in_grid_cell(event_x_coords_metres, event_y_coords_metres,
                             grid_edge_x_coords_metres,
                             grid_edge_y_coords_metres, row_index,
                             column_index, verbose):
    """Finds events in a certain grid cell.

    E = number of events
    M = number of rows in grid
    N = number of columns in grid

    :param event_x_coords_metres: length-E numpy array of x-coordinates.
    :param event_y_coords_metres: length-E numpy array of y-coordinates.
    :param grid_edge_x_coords_metres: length-(N + 1) numpy array with
        x-coordinates at edges of grid cells.
    :param grid_edge_y_coords_metres: length-(M + 1) numpy array with
        y-coordinates at edges of grid cells.
    :param row_index: Will find events in [i]th row of grid, where
        i = `row_index.`
    :param column_index: Will find events in [j]th column of grid, where
        j = `column_index.`
    :param verbose: Boolean flag.  If True, messages will be printed to command
        window.
    :return: desired_indices: 1-D numpy array with indices of events in desired
        grid cell.
    """

    error_checking.assert_is_numpy_array_without_nan(event_x_coords_metres)
    error_checking.assert_is_numpy_array(event_x_coords_metres,
                                         num_dimensions=1)

    num_events = len(event_x_coords_metres)
    these_expected_dim = numpy.array([num_events], dtype=int)

    error_checking.assert_is_numpy_array_without_nan(event_y_coords_metres)
    error_checking.assert_is_numpy_array(event_y_coords_metres,
                                         exact_dimensions=these_expected_dim)

    error_checking.assert_is_numpy_array(grid_edge_x_coords_metres,
                                         num_dimensions=1)
    error_checking.assert_is_greater_numpy_array(
        numpy.diff(grid_edge_x_coords_metres), 0)

    error_checking.assert_is_numpy_array(grid_edge_y_coords_metres,
                                         num_dimensions=1)
    error_checking.assert_is_greater_numpy_array(
        numpy.diff(grid_edge_y_coords_metres), 0)

    error_checking.assert_is_integer(row_index)
    error_checking.assert_is_geq(row_index, 0)
    error_checking.assert_is_integer(column_index)
    error_checking.assert_is_geq(column_index, 0)
    error_checking.assert_is_boolean(verbose)

    x_min_metres = grid_edge_x_coords_metres[column_index]
    x_max_metres = grid_edge_x_coords_metres[column_index + 1]
    y_min_metres = grid_edge_y_coords_metres[row_index]
    y_max_metres = grid_edge_y_coords_metres[row_index + 1]

    if row_index == len(grid_edge_y_coords_metres) - 2:
        y_max_metres += TOLERANCE
    if column_index == len(grid_edge_x_coords_metres) - 2:
        x_max_metres += TOLERANCE

    # TODO(thunderhoser): If need be, I could speed this up by computing
    # `row_flags` only once per row and `column_flags` only once per column.
    row_flags = numpy.logical_and(event_y_coords_metres >= y_min_metres,
                                  event_y_coords_metres < y_max_metres)

    if not numpy.any(row_flags):
        if verbose:
            print('0 of {0:d} events are in grid cell ({1:d}, {2:d})!'.format(
                num_events, row_index, column_index))

        return numpy.array([], dtype=int)

    column_flags = numpy.logical_and(event_x_coords_metres >= x_min_metres,
                                     event_x_coords_metres < x_max_metres)

    if not numpy.any(column_flags):
        if verbose:
            print('0 of {0:d} events are in grid cell ({1:d}, {2:d})!'.format(
                num_events, row_index, column_index))

        return numpy.array([], dtype=int)

    desired_indices = numpy.where(numpy.logical_and(row_flags,
                                                    column_flags))[0]

    if verbose:
        print('{0:d} of {1:d} events are in grid cell ({2:d}, {3:d})!'.format(
            len(desired_indices), num_events, row_index, column_index))

    return desired_indices
示例#27
0
def create_3d_net(
        num_input_features, first_spatial_dimensions, rowcol_upsampling_factors,
        height_upsampling_factors, num_output_channels,
        l1_weight=DEFAULT_L1_WEIGHT, l2_weight=DEFAULT_L2_WEIGHT,
        use_transposed_conv=True, activation_function_name=None,
        alpha_for_elu=DEFAULT_ALPHA_FOR_ELU,
        alpha_for_relu=DEFAULT_ALPHA_FOR_RELU,
        use_activn_for_last_layer=False,
        use_batch_norm=True, use_batch_norm_for_last_layer=True):
    """Creates (but does not train) upconvnet with 3 spatial dimensions.

    L = number of main (transposed-conv or upsampling) layers

    :param num_input_features: Length of input feature vector.
    :param first_spatial_dimensions: length-3 numpy array of dimensions in first
        main layer.  The order should be (num_rows, num_columns, num_heights).
        Before it is passed to the first main layer, the feature vector will be
        reshaped into a grid with these dimensions.
    :param rowcol_upsampling_factors: length-L numpy array of upsampling factors
        for horizontal dimensions.
    :param height_upsampling_factors: length-L numpy array of upsampling factors
        for vertical dimension.
    :param num_output_channels: Number of channels in output image.
    :param l1_weight: Weight of L1 regularization for conv and transposed-conv
        layers.
    :param l2_weight: Same but for L2 regularization.
    :param use_transposed_conv: Boolean flag.  If True, each upsampling will be
        done with a transposed-conv layer.  If False, each upsampling will be
        done with an upsampling layer followed by a normal conv layer.
    :param activation_function_name: Activation function.  If you do not want
        activation, make this None.  Otherwise, must be accepted by
        `architecture_utils.check_activation_function`.
    :param alpha_for_elu: See doc for
        `architecture_utils.check_activation_function`.
    :param alpha_for_relu: Same.
    :param use_activn_for_last_layer: Boolean flag.  If True, will apply
        activation function to output image.
    :param use_batch_norm: Boolean flag.  If True, will apply batch
        normalization to conv and transposed-conv layers.
    :param use_batch_norm_for_last_layer: Boolean flag.  If True, will apply
        batch normalization to output image.
    :return: model_object: Untrained model (instance of `keras.models.Model`).
    """

    # TODO(thunderhoser): This method assumes that the original CNN does
    # edge-padding.

    # Check input args.
    error_checking.assert_is_integer(num_input_features)
    error_checking.assert_is_greater(num_input_features, 0)
    error_checking.assert_is_integer(num_output_channels)
    error_checking.assert_is_greater(num_output_channels, 0)
    error_checking.assert_is_geq(l1_weight, 0.)
    error_checking.assert_is_geq(l2_weight, 0.)

    error_checking.assert_is_boolean(use_transposed_conv)
    error_checking.assert_is_boolean(use_activn_for_last_layer)
    error_checking.assert_is_boolean(use_batch_norm)
    error_checking.assert_is_boolean(use_batch_norm_for_last_layer)

    error_checking.assert_is_numpy_array(
        first_spatial_dimensions, exact_dimensions=numpy.array([3], dtype=int)
    )
    error_checking.assert_is_integer_numpy_array(first_spatial_dimensions)
    error_checking.assert_is_greater_numpy_array(first_spatial_dimensions, 0)

    error_checking.assert_is_numpy_array(
        rowcol_upsampling_factors, num_dimensions=1
    )
    error_checking.assert_is_integer_numpy_array(rowcol_upsampling_factors)
    error_checking.assert_is_geq_numpy_array(rowcol_upsampling_factors, 1)

    num_main_layers = len(rowcol_upsampling_factors)
    these_expected_dim = numpy.array([num_main_layers], dtype=int)

    error_checking.assert_is_numpy_array(
        height_upsampling_factors, exact_dimensions=these_expected_dim
    )
    error_checking.assert_is_integer_numpy_array(height_upsampling_factors)
    error_checking.assert_is_geq_numpy_array(height_upsampling_factors, 1)

    # Set up CNN architecture.
    regularizer_object = keras.regularizers.l1_l2(l1=l1_weight, l2=l2_weight)
    input_layer_object = keras.layers.Input(shape=(num_input_features,))

    current_num_filters = int(numpy.round(
        num_input_features / numpy.prod(first_spatial_dimensions)
    ))
    first_dimensions = numpy.concatenate((
        first_spatial_dimensions, numpy.array([current_num_filters], dtype=int)
    ))
    layer_object = keras.layers.Reshape(
        target_shape=first_dimensions
    )(input_layer_object)

    kernel_size_tuple = (CONV_FILTER_SIZE, CONV_FILTER_SIZE, CONV_FILTER_SIZE)

    for i in range(num_main_layers):
        if i == num_main_layers - 1:
            current_num_filters = num_output_channels + 0
        elif rowcol_upsampling_factors[i] == 1:
            current_num_filters = int(numpy.round(current_num_filters / 2))

        this_stride_tuple = (
            rowcol_upsampling_factors[i], rowcol_upsampling_factors[i],
            height_upsampling_factors[i]
        )

        if use_transposed_conv:
            layer_object = keras.layers.Conv3DTranspose(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=this_stride_tuple, padding='same',
                data_format='channels_last', dilation_rate=(1, 1, 1),
                activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)
        else:
            if rowcol_upsampling_factors[i] > 1:
                try:
                    layer_object = keras.layers.UpSampling3D(
                        size=this_stride_tuple, data_format='channels_last',
                        interpolation='bilinear'
                    )(layer_object)
                except:
                    layer_object = keras.layers.UpSampling3D(
                        size=this_stride_tuple, data_format='channels_last'
                    )(layer_object)

            layer_object = keras.layers.Conv3D(
                filters=current_num_filters, kernel_size=kernel_size_tuple,
                strides=(1, 1, 1), padding='same', data_format='channels_last',
                dilation_rate=(1, 1, 1), activation=None, use_bias=True,
                kernel_initializer='glorot_uniform', bias_initializer='zeros',
                kernel_regularizer=regularizer_object
            )(layer_object)

        use_activation_here = (
            activation_function_name is not None and
            (i < num_main_layers - 1 or use_activn_for_last_layer)
        )

        if use_activation_here:
            layer_object = architecture_utils.get_activation_layer(
                activation_function_string=activation_function_name,
                alpha_for_elu=alpha_for_elu, alpha_for_relu=alpha_for_relu
            )(layer_object)

        use_batch_norm_here = (
            use_batch_norm and
            (i < num_main_layers - 1 or use_batch_norm_for_last_layer)
        )

        if use_batch_norm_here:
            layer_object = (
                architecture_utils.get_batch_norm_layer()(layer_object)
            )

    # Compile CNN.
    model_object = keras.models.Model(
        inputs=input_layer_object, outputs=layer_object)
    model_object.compile(
        loss=keras.losses.mean_squared_error, optimizer=keras.optimizers.Adam()
    )

    model_object.summary()
    return model_object