def _run(model_file_name, top_example_dir_name, first_time_string,
         last_time_string, num_times, num_examples_per_time, output_file_name):
    """Runs permutation test for predictor importance.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param num_times: Same.
    :param num_examples_per_time: Same.
    :param output_file_name: Same.
    """

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print SEPARATOR_STRING
    predictor_matrix, target_values = _read_examples(
        top_example_dir_name=top_example_dir_name,
        first_time_string=first_time_string,
        last_time_string=last_time_string,
        num_times=num_times,
        num_examples_per_time=num_examples_per_time,
        model_metadata_dict=model_metadata_dict)
    print SEPARATOR_STRING

    narr_predictor_names = model_metadata_dict[
        traditional_cnn.NARR_PREDICTOR_NAMES_KEY]
    result_dict = permutation.run_permutation_test(
        model_object=model_object,
        list_of_input_matrices=[predictor_matrix],
        predictor_names_by_matrix=[narr_predictor_names],
        target_values=target_values,
        prediction_function=_prediction_function,
        cost_function=permutation.cross_entropy_function)

    print SEPARATOR_STRING
    print 'Writing results to: "{0:s}"...'.format(output_file_name)
    permutation.write_results(result_dict=result_dict,
                              pickle_file_name=output_file_name)
def _run(model_file_name, first_time_string, last_time_string, randomize_times,
         num_target_times, use_isotonic_regression, top_narr_directory_name,
         top_frontal_grid_dir_name, output_dir_name):
    """Applies traditional CNN to full grids.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param randomize_times: Same.
    :param num_target_times: Same.
    :param use_isotonic_regression: Same.
    :param top_narr_directory_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param output_dir_name: Same.
    """

    first_time_unix_sec = time_conversion.string_to_unix_sec(
        first_time_string, INPUT_TIME_FORMAT)
    last_time_unix_sec = time_conversion.string_to_unix_sec(
        last_time_string, INPUT_TIME_FORMAT)
    target_times_unix_sec = time_periods.range_and_interval_to_list(
        start_time_unix_sec=first_time_unix_sec,
        end_time_unix_sec=last_time_unix_sec,
        time_interval_sec=NARR_TIME_INTERVAL_SEC, include_endpoint=True)

    if randomize_times:
        error_checking.assert_is_leq(
            num_target_times, len(target_times_unix_sec))
        numpy.random.shuffle(target_times_unix_sec)
        target_times_unix_sec = target_times_unix_sec[:num_target_times]

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name, raise_error_if_missing=True)

    print 'Reading model metadata from: "{0:s}"...'.format(
        model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    if use_isotonic_regression:
        isotonic_file_name = isotonic_regression.find_model_file(
            base_model_file_name=model_file_name, raise_error_if_missing=True)

        print 'Reading isotonic-regression models from: "{0:s}"...'.format(
            isotonic_file_name)
        isotonic_model_object_by_class = (
            isotonic_regression.read_model_for_each_class(isotonic_file_name)
        )
    else:
        isotonic_model_object_by_class = None

    if model_metadata_dict[traditional_cnn.NUM_LEAD_TIME_STEPS_KEY] is None:
        num_dimensions = 3
    else:
        num_dimensions = 4

    num_classes = len(model_metadata_dict[traditional_cnn.CLASS_FRACTIONS_KEY])
    num_target_times = len(target_times_unix_sec)
    print SEPARATOR_STRING

    for i in range(num_target_times):
        if num_dimensions == 3:
            (this_class_probability_matrix, this_target_matrix
            ) = traditional_cnn.apply_model_to_3d_example(
                model_object=model_object,
                target_time_unix_sec=target_times_unix_sec[i],
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=model_metadata_dict[
                    traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
                pressure_level_mb=model_metadata_dict[
                    traditional_cnn.PRESSURE_LEVEL_KEY],
                dilation_distance_metres=model_metadata_dict[
                    traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY],
                num_rows_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
                num_columns_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
                num_classes=num_classes,
                isotonic_model_object_by_class=isotonic_model_object_by_class,
                narr_mask_matrix=model_metadata_dict[
                    traditional_cnn.NARR_MASK_MATRIX_KEY])
        else:
            (this_class_probability_matrix, this_target_matrix
            ) = traditional_cnn.apply_model_to_4d_example(
                model_object=model_object,
                target_time_unix_sec=target_times_unix_sec[i],
                predictor_time_step_offsets=model_metadata_dict[
                    traditional_cnn.PREDICTOR_TIME_STEP_OFFSETS_KEY],
                num_lead_time_steps=model_metadata_dict[
                    traditional_cnn.NUM_LEAD_TIME_STEPS_KEY],
                top_narr_directory_name=top_narr_directory_name,
                top_frontal_grid_dir_name=top_frontal_grid_dir_name,
                narr_predictor_names=model_metadata_dict[
                    traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
                pressure_level_mb=model_metadata_dict[
                    traditional_cnn.PRESSURE_LEVEL_KEY],
                dilation_distance_metres=model_metadata_dict[
                    traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY],
                num_rows_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
                num_columns_in_half_grid=model_metadata_dict[
                    traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
                num_classes=num_classes,
                isotonic_model_object_by_class=isotonic_model_object_by_class,
                narr_mask_matrix=model_metadata_dict[
                    traditional_cnn.NARR_MASK_MATRIX_KEY])

        this_target_matrix[this_target_matrix == -1] = 0
        print MINOR_SEPARATOR_STRING

        this_prediction_file_name = ml_utils.find_gridded_prediction_file(
            directory_name=output_dir_name,
            first_target_time_unix_sec=target_times_unix_sec[i],
            last_target_time_unix_sec=target_times_unix_sec[i],
            raise_error_if_missing=False)

        print 'Writing gridded predictions to file: "{0:s}"...'.format(
            this_prediction_file_name)

        ml_utils.write_gridded_predictions(
            pickle_file_name=this_prediction_file_name,
            class_probability_matrix=this_class_probability_matrix,
            target_times_unix_sec=target_times_unix_sec[[i]],
            model_file_name=model_file_name,
            used_isotonic_regression=use_isotonic_regression,
            target_matrix=this_target_matrix)

        if i != num_target_times - 1:
            print SEPARATOR_STRING
def _run(model_file_name, first_eval_time_string, last_eval_time_string,
         num_times, num_examples_per_time, dilation_distance_metres,
         use_isotonic_regression, top_narr_directory_name,
         top_frontal_grid_dir_name, output_dir_name):
    """Evaluates CNN trained by patch classification.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param first_eval_time_string: Same.
    :param last_eval_time_string: Same.
    :param num_times: Same.
    :param num_examples_per_time: Same.
    :param dilation_distance_metres: Same.
    :param use_isotonic_regression: Same.
    :param top_narr_directory_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param output_dir_name: Same.
    """

    first_eval_time_unix_sec = time_conversion.string_to_unix_sec(
        first_eval_time_string, INPUT_TIME_FORMAT)
    last_eval_time_unix_sec = time_conversion.string_to_unix_sec(
        last_eval_time_string, INPUT_TIME_FORMAT)

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name, raise_error_if_missing=True)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    if dilation_distance_metres < 0:
        dilation_distance_metres = model_metadata_dict[
            traditional_cnn.DILATION_DISTANCE_FOR_TARGET_KEY] + 0.

    if use_isotonic_regression:
        isotonic_file_name = isotonic_regression.find_model_file(
            base_model_file_name=model_file_name, raise_error_if_missing=True)

        print 'Reading isotonic-regression models from: "{0:s}"...'.format(
            isotonic_file_name)
        isotonic_model_object_by_class = (
            isotonic_regression.read_model_for_each_class(isotonic_file_name))
    else:
        isotonic_model_object_by_class = None

    num_classes = len(model_metadata_dict[traditional_cnn.CLASS_FRACTIONS_KEY])
    print SEPARATOR_STRING

    class_probability_matrix, observed_labels = (
        eval_utils.downsized_examples_to_eval_pairs(
            model_object=model_object,
            first_target_time_unix_sec=first_eval_time_unix_sec,
            last_target_time_unix_sec=last_eval_time_unix_sec,
            num_target_times_to_sample=num_times,
            num_examples_per_time=num_examples_per_time,
            top_narr_directory_name=top_narr_directory_name,
            top_frontal_grid_dir_name=top_frontal_grid_dir_name,
            narr_predictor_names=model_metadata_dict[
                traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
            pressure_level_mb=model_metadata_dict[
                traditional_cnn.PRESSURE_LEVEL_KEY],
            dilation_distance_metres=dilation_distance_metres,
            num_rows_in_half_grid=model_metadata_dict[
                traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
            num_columns_in_half_grid=model_metadata_dict[
                traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY],
            num_classes=num_classes,
            predictor_time_step_offsets=model_metadata_dict[
                traditional_cnn.PREDICTOR_TIME_STEP_OFFSETS_KEY],
            num_lead_time_steps=model_metadata_dict[
                traditional_cnn.NUM_LEAD_TIME_STEPS_KEY],
            isotonic_model_object_by_class=isotonic_model_object_by_class,
            narr_mask_matrix=model_metadata_dict[
                traditional_cnn.NARR_MASK_MATRIX_KEY]))

    print SEPARATOR_STRING

    model_eval_helper.run_evaluation(
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels,
        output_dir_name=output_dir_name)
def _run(num_half_rows, num_half_columns, narr_predictor_names,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, top_validation_dir_name,
         first_validation_time_string, last_validation_time_string,
         num_examples_per_batch, num_epochs, num_training_batches_per_epoch,
         num_validation_batches_per_epoch, output_model_file_name):
    """Trains CNN for use with upconvnet.

    This is effectively the main method.

    :param num_half_rows: See documentation at top of file.
    :param num_half_columns: Same.
    :param narr_predictor_names: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_model_file_name: Same.
    """

    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    print 'Reading NARR mask from: "{0:s}"...'.format(NARR_MASK_FILE_NAME)
    narr_mask_matrix = ml_utils.read_narr_mask(NARR_MASK_FILE_NAME)

    output_metafile_name = traditional_cnn.find_metafile(
        model_file_name=output_model_file_name, raise_error_if_missing=False)
    print 'Writing metadata to: "{0:s}"...'.format(output_metafile_name)

    traditional_cnn.write_model_metadata(
        pickle_file_name=output_metafile_name, num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_examples_per_target_time=NUM_EXAMPLES_PER_TIME,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        num_rows_in_half_grid=num_half_rows,
        num_columns_in_half_grid=num_half_columns,
        dilation_distance_metres=DILATION_DISTANCE_METRES,
        class_fractions=CLASS_FRACTIONS,
        weight_loss_function=WEIGHT_LOSS_FUNCTION,
        narr_predictor_names=narr_predictor_names,
        pressure_level_mb=PRESSURE_LEVEL_MB,
        training_start_time_unix_sec=first_training_time_unix_sec,
        training_end_time_unix_sec=last_training_time_unix_sec,
        validation_start_time_unix_sec=first_validation_time_unix_sec,
        validation_end_time_unix_sec=last_validation_time_unix_sec,
        num_lead_time_steps=None,
        predictor_time_step_offsets=None,
        narr_mask_matrix=narr_mask_matrix)
    print SEPARATOR_STRING

    model_object = cnn_architecture.create_cnn(
        num_half_rows=num_half_rows, num_half_columns=num_half_columns,
        num_channels=len(narr_predictor_names))
    print SEPARATOR_STRING

    traditional_cnn.quick_train_3d(
        model_object=model_object, output_file_name=output_model_file_name,
        num_examples_per_batch=num_examples_per_batch, num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_start_time_unix_sec=first_training_time_unix_sec,
        training_end_time_unix_sec=last_training_time_unix_sec,
        top_training_dir_name=top_training_dir_name,
        top_validation_dir_name=top_validation_dir_name,
        narr_predictor_names=narr_predictor_names,
        num_classes=len(CLASS_FRACTIONS),
        num_rows_in_half_grid=num_half_rows,
        num_columns_in_half_grid=num_half_columns,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_start_time_unix_sec=first_validation_time_unix_sec,
        validation_end_time_unix_sec=last_validation_time_unix_sec)
def _run(input_file_name, colour_map_name, min_colour_percentile,
         max_colour_percentile, same_cmap_for_all_predictors,
         top_output_dir_name):
    """Plots results of backwards optimization.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param colour_map_name: Same.
    :param min_colour_percentile: Same.
    :param max_colour_percentile: Same.
    :param same_cmap_for_all_predictors: Same.
    :param top_output_dir_name: Same.
    """

    original_output_dir_name = '{0:s}/original'.format(top_output_dir_name)
    optimized_output_dir_name = '{0:s}/optimized'.format(top_output_dir_name)

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=original_output_dir_name)
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=optimized_output_dir_name)

    error_checking.assert_is_geq(min_colour_percentile, 0.)
    error_checking.assert_is_leq(max_colour_percentile, 100.)
    error_checking.assert_is_greater(max_colour_percentile,
                                     min_colour_percentile)

    colour_map_object = pyplot.cm.get_cmap(colour_map_name)

    print 'Reading data from: "{0:s}"...'.format(input_file_name)
    this_list, bwo_metadata_dict = bwo.read_results(input_file_name)

    optimized_predictor_matrix = this_list[0]
    num_examples = optimized_predictor_matrix.shape[0]
    del this_list

    original_predictor_matrix = bwo_metadata_dict[bwo.INIT_FUNCTION_KEY][0]
    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=bwo_metadata_dict[bwo.MODEL_FILE_NAME_KEY])

    print 'Reading metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    narr_predictor_names = model_metadata_dict[
        traditional_cnn.NARR_PREDICTOR_NAMES_KEY]
    num_predictors = len(narr_predictor_names)

    try:
        example_plotting.get_wind_indices(narr_predictor_names)
        plot_wind_barbs = True
    except ValueError:
        plot_wind_barbs = False

    for i in range(num_examples):
        this_combined_matrix = numpy.concatenate(
            (original_predictor_matrix[i, ...],
             optimized_predictor_matrix[i, ...]),
            axis=0)

        if same_cmap_for_all_predictors:
            this_min_colour_value = numpy.percentile(this_combined_matrix,
                                                     min_colour_percentile)
            this_max_colour_value = numpy.percentile(this_combined_matrix,
                                                     max_colour_percentile)

            this_min_cval_by_predictor = numpy.full(num_predictors,
                                                    this_min_colour_value)
            this_max_cval_by_predictor = numpy.full(num_predictors,
                                                    this_max_colour_value)
        else:
            this_min_cval_by_predictor = numpy.full(num_predictors, numpy.nan)
            this_max_cval_by_predictor = this_min_cval_by_predictor + 0.

            for k in range(num_predictors):
                this_min_cval_by_predictor[k] = numpy.percentile(
                    this_combined_matrix[..., k], min_colour_percentile)
                this_max_cval_by_predictor[k] = numpy.percentile(
                    this_combined_matrix[..., k], max_colour_percentile)

        this_figure_file_name = '{0:s}/example{1:06d}_original.jpg'.format(
            original_output_dir_name, i)

        if plot_wind_barbs:
            example_plotting.plot_many_predictors_with_barbs(
                predictor_matrix=original_predictor_matrix[i, ...],
                predictor_names=narr_predictor_names,
                cmap_object_by_predictor=[colour_map_object] * num_predictors,
                min_colour_value_by_predictor=this_min_cval_by_predictor,
                max_colour_value_by_predictor=this_max_cval_by_predictor)
        else:
            example_plotting.plot_many_predictors_sans_barbs(
                predictor_matrix=original_predictor_matrix[i, ...],
                predictor_names=narr_predictor_names,
                cmap_object_by_predictor=[colour_map_object] * num_predictors,
                min_colour_value_by_predictor=this_min_cval_by_predictor,
                max_colour_value_by_predictor=this_max_cval_by_predictor)

        print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name)
        pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI)
        pyplot.close()

        this_figure_file_name = '{0:s}/example{1:06d}_optimized.jpg'.format(
            optimized_output_dir_name, i)

        if plot_wind_barbs:
            example_plotting.plot_many_predictors_with_barbs(
                predictor_matrix=optimized_predictor_matrix[i, ...],
                predictor_names=narr_predictor_names,
                cmap_object_by_predictor=[colour_map_object] * num_predictors,
                min_colour_value_by_predictor=this_min_cval_by_predictor,
                max_colour_value_by_predictor=this_max_cval_by_predictor)
        else:
            example_plotting.plot_many_predictors_sans_barbs(
                predictor_matrix=optimized_predictor_matrix[i, ...],
                predictor_names=narr_predictor_names,
                cmap_object_by_predictor=[colour_map_object] * num_predictors,
                min_colour_value_by_predictor=this_min_cval_by_predictor,
                max_colour_value_by_predictor=this_max_cval_by_predictor)

        print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name)
        pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI)
        pyplot.close()
示例#6
0
def _run(input_cnn_file_name, use_batch_norm_for_out_layer, use_transposed_conv,
         use_conv_for_out_layer, smoothing_radius_px, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_model_file_name):
    """Trains upconvnet.

    This is effectively the main method.

    :param input_cnn_file_name: See documentation at top of file.
    :param use_batch_norm_for_out_layer: Same.
    :param use_transposed_conv: Same.
    :param use_conv_for_out_layer: Same.
    :param smoothing_radius_px: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_model_file_name: Same.
    """

    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if smoothing_radius_px <= 0:
        smoothing_radius_px = None

    print 'Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name)
    cnn_model_object = traditional_cnn.read_keras_model(input_cnn_file_name)

    cnn_metafile_name = traditional_cnn.find_metafile(
        model_file_name=input_cnn_file_name, raise_error_if_missing=True)

    print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)
    cnn_metadata_dict = traditional_cnn.read_model_metadata(
        cnn_metafile_name)

    cnn_feature_layer_name = traditional_cnn.get_flattening_layer(
        cnn_model_object)

    cnn_feature_layer_object = cnn_model_object.get_layer(
        name=cnn_feature_layer_name)
    cnn_feature_dimensions = numpy.array(
        cnn_feature_layer_object.input.shape[1:], dtype=int)

    num_input_features = numpy.prod(cnn_feature_dimensions)
    first_num_rows = cnn_feature_dimensions[0]
    first_num_columns = cnn_feature_dimensions[1]
    num_output_channels = numpy.array(
        cnn_model_object.input.shape[1:], dtype=int
    )[-1]

    ucn_metafile_name = traditional_cnn.find_metafile(
        model_file_name=output_model_file_name, raise_error_if_missing=False)

    print 'Writing upconvnet metadata to: "{0:s}"...'.format(ucn_metafile_name)
    upconvnet.write_model_metadata(
        pickle_file_name=ucn_metafile_name,
        top_training_dir_name=top_training_dir_name,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        cnn_model_file_name=input_cnn_file_name,
        cnn_feature_layer_name=cnn_feature_layer_name, num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        top_validation_dir_name=top_validation_dir_name,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec)
    print SEPARATOR_STRING

    ucn_model_object = upconvnet.create_net(
        num_input_features=num_input_features, first_num_rows=first_num_rows,
        first_num_columns=first_num_columns,
        upsampling_factors=UPSAMPLING_FACTORS,
        num_output_channels=num_output_channels,
        use_activation_for_out_layer=False,
        use_bn_for_out_layer=use_batch_norm_for_out_layer,
        use_transposed_conv=use_transposed_conv,
        use_conv_for_out_layer=use_conv_for_out_layer,
        smoothing_radius_px=smoothing_radius_px)
    print SEPARATOR_STRING

    upconvnet.train_upconvnet(
        ucn_model_object=ucn_model_object,
        top_training_dir_name=top_training_dir_name,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        cnn_model_object=cnn_model_object,
        cnn_feature_layer_name=cnn_feature_layer_name,
        cnn_metadata_dict=cnn_metadata_dict,
        num_examples_per_batch=num_examples_per_batch,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        output_model_file_name=output_model_file_name,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        top_validation_dir_name=top_validation_dir_name,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec)
def _run(orig_model_file_name, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         num_training_examples, top_validn_dir_name, first_validn_time_string,
         last_validn_time_string, num_validn_examples, narr_predictor_names,
         num_training_examples_per_batch, num_epochs, min_loss_decrease,
         min_percentage_loss_decrease, num_steps_for_loss_decrease,
         output_file_name):
    """Runs sequential forward selection.

    This is effectively the main method.

    :param orig_model_file_name: See documentation at top of file.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param num_training_examples: Same.
    :param top_validn_dir_name: Same.
    :param first_validn_time_string: Same.
    :param last_validn_time_string: Same.
    :param num_validn_examples: Same.
    :param narr_predictor_names: Same.
    :param num_training_examples_per_batch: Same.
    :param num_epochs: Same.
    :param min_loss_decrease: Same.
    :param min_percentage_loss_decrease: Same.
    :param num_steps_for_loss_decrease: Same.
    :param output_file_name: Same.
    """

    print 'Reading original model from: "{0:s}"...'.format(orig_model_file_name)
    orig_model_object = traditional_cnn.read_keras_model(orig_model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=orig_model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(
        model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print SEPARATOR_STRING
    training_predictor_matrix, training_target_values = _read_examples(
        top_example_dir_name=top_training_dir_name,
        first_time_string=first_training_time_string,
        last_time_string=last_training_time_string,
        num_examples=num_training_examples,
        model_metadata_dict=model_metadata_dict)
    print SEPARATOR_STRING

    validn_predictor_matrix, validn_target_values = _read_examples(
        top_example_dir_name=top_validn_dir_name,
        first_time_string=first_validn_time_string,
        last_time_string=last_validn_time_string,
        num_examples=num_validn_examples,
        model_metadata_dict=model_metadata_dict)
    print SEPARATOR_STRING

    # TODO(thunderhoser): I could make the code more efficient by making
    # `narr_predictor_names` an input arg to `_read_examples`.
    if narr_predictor_names[0] in ['', 'None']:
        narr_predictor_names = model_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY]

    training_function = sequential_selection.create_training_function(
        num_training_examples_per_batch=num_training_examples_per_batch,
        num_epochs=num_epochs)

    result_dict = sequential_selection.run_sfs(
        list_of_training_matrices=[training_predictor_matrix],
        training_target_values=training_target_values,
        list_of_validation_matrices=[validn_predictor_matrix],
        validation_target_values=validn_target_values,
        predictor_names_by_matrix=[narr_predictor_names],
        model_builder=_create_model_builder(orig_model_object),
        training_function=training_function,
        min_loss_decrease=min_loss_decrease,
        min_percentage_loss_decrease=min_percentage_loss_decrease,
        num_steps_for_loss_decrease=num_steps_for_loss_decrease)
    print SEPARATOR_STRING

    result_dict.update({
        ORIG_MODEL_FILE_ARG_NAME: orig_model_file_name,
        TRAINING_DIR_ARG_NAME: top_training_dir_name,
        FIRST_TRAINING_TIME_ARG_NAME: first_training_time_string,
        LAST_TRAINING_TIME_ARG_NAME: last_training_time_string,
        NUM_TRAINING_EXAMPLES_ARG_NAME: num_training_examples,
        VALIDN_DIR_ARG_NAME: top_validn_dir_name,
        FIRST_VALIDN_TIME_ARG_NAME: first_validn_time_string,
        LAST_VALIDN_TIME_ARG_NAME: last_validn_time_string,
        NUM_VALIDN_EXAMPLES_ARG_NAME: num_validn_examples
    })

    print 'Writing results to: "{0:s}"...'.format(output_file_name)
    sequential_selection.write_results(
        result_dict=result_dict, pickle_file_name=output_file_name)
def _run(upconvnet_file_name, example_file_name, num_examples, example_indices,
         top_output_dir_name):
    """Applies upconvnet to one or more examples.

    This is effectively the main method.

    :param upconvnet_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param top_output_dir_name: Same.
    """

    # Check input args.
    if num_examples <= 0:
        num_examples = None

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
    else:
        error_checking.assert_is_greater(num_examples, 0)

    # Read upconvnet and metadata.
    ucn_metafile_name = traditional_cnn.find_metafile(
        model_file_name=upconvnet_file_name, raise_error_if_missing=True)

    print('Reading trained upconvnet from: "{0:s}"...'.format(
        upconvnet_file_name))
    ucn_model_object = traditional_cnn.read_keras_model(upconvnet_file_name)

    print('Reading upconvnet metadata from: "{0:s}"...'.format(
        ucn_metafile_name))
    ucn_metadata_dict = upconvnet.read_model_metadata(ucn_metafile_name)

    # Read CNN and metadata.
    cnn_file_name = ucn_metadata_dict[upconvnet.CNN_FILE_NAME_KEY]
    cnn_metafile_name = traditional_cnn.find_metafile(
        model_file_name=cnn_file_name, raise_error_if_missing=True)

    print 'Reading trained CNN from: "{0:s}"...'.format(cnn_file_name)
    cnn_model_object = traditional_cnn.read_keras_model(cnn_file_name)

    print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)
    cnn_metadata_dict = traditional_cnn.read_model_metadata(cnn_metafile_name)
    print SEPARATOR_STRING

    actual_image_matrix = _read_input_examples(
        example_file_name=example_file_name,
        cnn_metadata_dict=cnn_metadata_dict,
        num_examples=num_examples,
        example_indices=example_indices)
    print SEPARATOR_STRING

    reconstructed_image_matrix = upconvnet.apply_upconvnet(
        actual_image_matrix=actual_image_matrix,
        cnn_model_object=cnn_model_object,
        ucn_model_object=ucn_model_object)
    print SEPARATOR_STRING

    _plot_examples(actual_image_matrix=actual_image_matrix,
                   reconstructed_image_matrix=reconstructed_image_matrix,
                   narr_predictor_names=cnn_metadata_dict[
                       traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
                   top_output_dir_name=top_output_dir_name)
def _run(num_epochs, num_examples_per_batch, num_examples_per_time,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         num_rows_in_half_grid, num_columns_in_half_grid,
         dilation_distance_metres, weight_loss_function, class_fractions,
         num_classes, num_lead_time_steps, predictor_time_step_offsets,
         pressure_level_mb, narr_predictor_names, training_start_time_string,
         training_end_time_string, validation_start_time_string,
         validation_end_time_string, top_narr_dir_name,
         top_frontal_grid_dir_name, narr_mask_file_name, num_conv_layer_sets,
         num_conv_layers_per_set, pooling_type_string,
         conv_activation_function_string, alpha_for_elu, alpha_for_relu,
         use_batch_normalization, init_num_filters,
         conv_layer_dropout_fraction, dense_layer_dropout_fraction, l2_weight,
         output_file_name):
    """Trains CNN for patch classification.

    This is effectively the main method.

    :param num_epochs: See documentation at top of machine_learning_helper.py.
    :param num_examples_per_batch: Same.
    :param num_examples_per_time: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param num_rows_in_half_grid: Same.
    :param num_columns_in_half_grid: Same.
    :param dilation_distance_metres: Same.
    :param weight_loss_function: Same.
    :param class_fractions: Same.
    :param num_classes: Same.
    :param num_lead_time_steps: Same.
    :param predictor_time_step_offsets: Same.
    :param pressure_level_mb: Same.
    :param narr_predictor_names: Same.
    :param training_start_time_string: Same.
    :param training_end_time_string: Same.
    :param validation_start_time_string: Same.
    :param validation_end_time_string: Same.
    :param top_narr_dir_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param narr_mask_file_name: Same.
    :param num_conv_layer_sets: Same.
    :param num_conv_layers_per_set: Same.
    :param pooling_type_string: Same.
    :param conv_activation_function_string: Same.
    :param alpha_for_elu: Same.
    :param alpha_for_relu: Same.
    :param use_batch_normalization: Same.
    :param init_num_filters: Same.
    :param conv_layer_dropout_fraction: Same.
    :param dense_layer_dropout_fraction: Same.
    :param l2_weight: Same.
    :param output_file_name: Same.
    :raises: ValueError: if num_lead_time_steps > 1.  This script cannot yet
        handle convolution over time.
    """

    if conv_layer_dropout_fraction <= 0:
        conv_layer_dropout_fraction = None
    if dense_layer_dropout_fraction <= 0:
        dense_layer_dropout_fraction = None
    if l2_weight <= 0:
        l2_weight = None
    if narr_mask_file_name == '':
        narr_mask_file_name = None

    training_start_time_unix_sec = time_conversion.string_to_unix_sec(
        training_start_time_string, INPUT_TIME_FORMAT)
    training_end_time_unix_sec = time_conversion.string_to_unix_sec(
        training_end_time_string, INPUT_TIME_FORMAT)

    validation_start_time_unix_sec = time_conversion.string_to_unix_sec(
        validation_start_time_string, INPUT_TIME_FORMAT)
    validation_end_time_unix_sec = time_conversion.string_to_unix_sec(
        validation_end_time_string, INPUT_TIME_FORMAT)

    if num_lead_time_steps <= 1:
        num_lead_time_steps = None
        predictor_time_step_offsets = None
    else:
        error_string = (
            'num_lead_time_steps > 1 (specifically {0:d}), but this script '
            'cannot yet handle convolution over time.'
        ).format(num_lead_time_steps)
        raise ValueError(error_string)

    if narr_mask_file_name is None:
        narr_mask_matrix = None
    else:
        print 'Reading NARR mask from: "{0:s}"...'.format(narr_mask_file_name)
        narr_mask_matrix = ml_utils.read_narr_mask(narr_mask_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=output_file_name, raise_error_if_missing=False)
    print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name)

    traditional_cnn.write_model_metadata(
        pickle_file_name=model_metafile_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_examples_per_target_time=num_examples_per_time,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        num_rows_in_half_grid=num_rows_in_half_grid,
        num_columns_in_half_grid=num_columns_in_half_grid,
        dilation_distance_metres=dilation_distance_metres,
        class_fractions=class_fractions,
        weight_loss_function=weight_loss_function,
        narr_predictor_names=narr_predictor_names,
        pressure_level_mb=pressure_level_mb,
        training_start_time_unix_sec=training_start_time_unix_sec,
        training_end_time_unix_sec=training_end_time_unix_sec,
        validation_start_time_unix_sec=validation_start_time_unix_sec,
        validation_end_time_unix_sec=validation_end_time_unix_sec,
        num_lead_time_steps=num_lead_time_steps,
        predictor_time_step_offsets=predictor_time_step_offsets,
        narr_mask_matrix=narr_mask_matrix)

    num_rows_in_grid = 2 * num_rows_in_half_grid + 1
    num_columns_in_grid = 2 * num_columns_in_half_grid + 1
    num_predictor_fields = len(narr_predictor_names)

    model_object = cnn_architecture.get_2d_swirlnet_architecture(
        num_radar_rows=num_rows_in_grid,
        num_radar_columns=num_columns_in_grid,
        num_radar_channels=num_predictor_fields,
        num_radar_conv_layer_sets=num_conv_layer_sets,
        num_conv_layers_per_set=num_conv_layers_per_set,
        pooling_type_string=pooling_type_string,
        num_classes=num_classes,
        conv_activation_function_string=conv_activation_function_string,
        alpha_for_elu=alpha_for_elu,
        alpha_for_relu=alpha_for_relu,
        use_batch_normalization=use_batch_normalization,
        init_num_radar_filters=init_num_filters,
        conv_layer_dropout_fraction=conv_layer_dropout_fraction,
        dense_layer_dropout_fraction=dense_layer_dropout_fraction,
        l2_weight=l2_weight,
        list_of_metric_functions=traditional_cnn.LIST_OF_METRIC_FUNCTIONS)

    traditional_cnn.train_with_3d_examples(
        model_object=model_object,
        output_file_name=output_file_name,
        num_examples_per_batch=num_examples_per_batch,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        num_examples_per_target_time=num_examples_per_time,
        training_start_time_unix_sec=training_start_time_unix_sec,
        training_end_time_unix_sec=training_end_time_unix_sec,
        top_narr_directory_name=top_narr_dir_name,
        top_frontal_grid_dir_name=top_frontal_grid_dir_name,
        narr_predictor_names=narr_predictor_names,
        pressure_level_mb=pressure_level_mb,
        dilation_distance_metres=dilation_distance_metres,
        class_fractions=class_fractions,
        num_rows_in_half_grid=num_rows_in_half_grid,
        num_columns_in_half_grid=num_columns_in_half_grid,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_start_time_unix_sec=validation_start_time_unix_sec,
        validation_end_time_unix_sec=validation_end_time_unix_sec,
        narr_mask_matrix=narr_mask_matrix)
示例#10
0
def _run(model_file_name, example_file_name, num_examples, example_indices,
         component_type_string, target_class, layer_name, ideal_activation,
         neuron_indices, channel_index, output_file_name):
    """Creates saliency map for each example, based on the same CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param ideal_activation: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param output_file_name: Same.
    """

    if num_examples <= 0:
        num_examples = None

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
    else:
        error_checking.assert_is_greater(num_examples, 0)

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print 'Reading normalized examples from: "{0:s}"...'.format(
        example_file_name)
    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        predictor_names_to_keep=model_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
        num_half_rows_to_keep=model_metadata_dict[
            traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
        num_half_columns_to_keep=model_metadata_dict[
            traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY])

    predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY]
    if num_examples is not None:
        num_examples_total = predictor_matrix.shape[0]
        example_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    predictor_matrix = predictor_matrix[example_indices, ...]

    if component_type_string == CLASS_COMPONENT_TYPE_STRING:
        print 'Computing saliency maps for target class {0:d}...'.format(
            target_class)

        saliency_matrix = (
            gg_saliency_maps.get_saliency_maps_for_class_activation(
                model_object=model_object,
                target_class=target_class,
                list_of_input_matrices=[predictor_matrix])[0])

    elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
        print('Computing saliency maps for neuron {0:s} in layer "{1:s}"...'
              ).format(str(neuron_indices), layer_name)

        saliency_matrix = (
            gg_saliency_maps.get_saliency_maps_for_neuron_activation(
                model_object=model_object,
                layer_name=layer_name,
                neuron_indices=neuron_indices,
                list_of_input_matrices=[predictor_matrix],
                ideal_activation=ideal_activation)[0])

    else:
        print('Computing saliency maps for channel {0:d} in layer "{1:s}"...'
              ).format(channel_index, layer_name)

        saliency_matrix = (
            gg_saliency_maps.get_saliency_maps_for_channel_activation(
                model_object=model_object,
                layer_name=layer_name,
                channel_index=channel_index,
                list_of_input_matrices=[predictor_matrix],
                stat_function_for_neuron_activations=K.max,
                ideal_activation=ideal_activation)[0])

    print 'Writing results to: "{0:s}"...'.format(output_file_name)
    ge_saliency_maps.write_file(pickle_file_name=output_file_name,
                                normalized_predictor_matrix=predictor_matrix,
                                saliency_matrix=saliency_matrix,
                                model_file_name=model_file_name,
                                component_type_string=component_type_string,
                                target_class=target_class,
                                layer_name=layer_name,
                                ideal_activation=ideal_activation,
                                neuron_indices=neuron_indices,
                                channel_index=channel_index)
示例#11
0
def _run(model_file_name, example_file_name, num_examples, example_indices,
         component_type_string, target_class, layer_name, ideal_activation,
         neuron_indices, channel_index, num_iterations, learning_rate,
         output_file_name):
    """Runs backwards optimization on a trained CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param ideal_activation: Same.
    :param neuron_indices: Same.
    :param channel_index: Same.
    :param num_iterations: Same.
    :param learning_rate: Same.
    :param output_file_name: Same.
    """

    if num_examples <= 0:
        num_examples = None

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print 'Reading normalized examples from: "{0:s}"...'.format(
        example_file_name)
    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        predictor_names_to_keep=model_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
        num_half_rows_to_keep=model_metadata_dict[
            traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
        num_half_columns_to_keep=model_metadata_dict[
            traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY])

    predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY]

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
        num_examples = len(example_indices)
    else:
        error_checking.assert_is_greater(num_examples, 0)

        num_examples_total = predictor_matrix.shape[0]
        example_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    predictor_matrix = predictor_matrix[example_indices, ...]
    optimized_predictor_matrix = numpy.full(predictor_matrix.shape, numpy.nan)
    print SEPARATOR_STRING

    for i in range(num_examples):
        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print(
                'Optimizing {0:d}th of {1:d} images for target class {2:d}...'
            ).format(i + 1, num_examples, target_class)

            optimized_predictor_matrix[i, ...] = (
                backwards_opt.optimize_input_for_class(
                    model_object=model_object,
                    target_class=target_class,
                    init_function_or_matrices=[predictor_matrix[[i], ...]],
                    num_iterations=num_iterations,
                    learning_rate=learning_rate)[0])

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            print(
                'Optimizing {0:d}th of {1:d} images for neuron {2:s} in layer '
                '"{3:s}"...').format(i + 1, num_examples, str(neuron_indices),
                                     layer_name)

            optimized_predictor_matrix[i, ...] = (
                backwards_opt.optimize_input_for_neuron(
                    model_object=model_object,
                    layer_name=layer_name,
                    neuron_indices=neuron_indices,
                    init_function_or_matrices=[predictor_matrix[[i], ...]],
                    num_iterations=num_iterations,
                    learning_rate=learning_rate,
                    ideal_activation=ideal_activation)[0])

        else:
            print(
                'Optimizing {0:d}th of {1:d} images for channel {2:d} in layer '
                '"{3:s}"...').format(i + 1, num_examples, channel_index,
                                     layer_name)

            optimized_predictor_matrix[i, ...] = (
                backwards_opt.optimize_input_for_channel(
                    model_object=model_object,
                    layer_name=layer_name,
                    channel_index=channel_index,
                    init_function_or_matrices=[predictor_matrix[[i], ...]],
                    stat_function_for_neuron_activations=K.max,
                    num_iterations=num_iterations,
                    learning_rate=learning_rate,
                    ideal_activation=ideal_activation)[0])

        print SEPARATOR_STRING

    print 'Writing results to: "{0:s}"...'.format(output_file_name)
    backwards_opt.write_results(
        pickle_file_name=output_file_name,
        list_of_optimized_input_matrices=[optimized_predictor_matrix],
        model_file_name=model_file_name,
        init_function_name_or_matrices=[predictor_matrix],
        num_iterations=num_iterations,
        learning_rate=learning_rate,
        component_type_string=component_type_string,
        target_class=target_class,
        layer_name=layer_name,
        neuron_indices=neuron_indices,
        channel_index=channel_index,
        ideal_activation=ideal_activation)
示例#12
0
def _run(input_file_name, predictor_colour_map_name,
         min_colour_prctile_for_predictors, max_colour_prctile_for_predictors,
         saliency_colour_map_name, max_colour_prctile_for_saliency,
         saliency_contour_line_width, num_saliency_contours, output_dir_name):
    """Plots saliency maps.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param predictor_colour_map_name: Same.
    :param min_colour_prctile_for_predictors: Same.
    :param max_colour_prctile_for_predictors: Same.
    :param saliency_colour_map_name: Same.
    :param max_colour_prctile_for_saliency: Same.
    :param saliency_contour_line_width: Same.
    :param num_saliency_contours: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    error_checking.assert_is_geq(min_colour_prctile_for_predictors, 0.)
    error_checking.assert_is_leq(max_colour_prctile_for_predictors, 100.)
    error_checking.assert_is_greater(max_colour_prctile_for_predictors,
                                     min_colour_prctile_for_predictors)

    error_checking.assert_is_geq(max_colour_prctile_for_saliency, 0.)
    error_checking.assert_is_leq(max_colour_prctile_for_saliency, 100.)

    error_checking.assert_is_geq(num_saliency_contours, 2)
    num_saliency_contours = 1 + int(
        number_rounding.floor_to_nearest(num_saliency_contours, 2))
    half_num_saliency_contours = (num_saliency_contours - 1) / 2

    predictor_colour_map_object = pyplot.cm.get_cmap(predictor_colour_map_name)
    saliency_colour_map_object = pyplot.cm.get_cmap(saliency_colour_map_name)

    print 'Reading data from: "{0:s}"...'.format(input_file_name)
    predictor_matrix, saliency_matrix, saliency_metadata_dict = (
        saliency_maps.read_file(input_file_name))

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=saliency_metadata_dict[
            saliency_maps.MODEL_FILE_NAME_KEY])

    print 'Reading metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    narr_predictor_names = model_metadata_dict[
        traditional_cnn.NARR_PREDICTOR_NAMES_KEY]
    num_predictors = len(narr_predictor_names)
    num_examples = predictor_matrix.shape[0]

    for i in range(num_examples):
        this_min_cval_by_predictor = numpy.full(num_predictors, numpy.nan)
        this_max_cval_by_predictor = this_min_cval_by_predictor + 0.

        for k in range(num_predictors):
            this_min_cval_by_predictor[k] = numpy.percentile(
                predictor_matrix[i, ..., k], min_colour_prctile_for_predictors)
            this_max_cval_by_predictor[k] = numpy.percentile(
                predictor_matrix[i, ..., k], max_colour_prctile_for_predictors)

        _, these_axes_objects = example_plotting.plot_many_predictors_sans_barbs(
            predictor_matrix=predictor_matrix[i, ...],
            predictor_names=narr_predictor_names,
            cmap_object_by_predictor=[predictor_colour_map_object] *
            num_predictors,
            min_colour_value_by_predictor=this_min_cval_by_predictor,
            max_colour_value_by_predictor=this_max_cval_by_predictor)

        this_max_abs_contour_level = numpy.percentile(
            numpy.absolute(saliency_matrix[i, ...]),
            max_colour_prctile_for_saliency)

        this_contour_interval = (this_max_abs_contour_level /
                                 half_num_saliency_contours)

        saliency_plotting.plot_many_2d_grids(
            saliency_matrix_3d=saliency_matrix[i, ...],
            axes_objects_2d_list=these_axes_objects,
            colour_map_object=saliency_colour_map_object,
            max_absolute_contour_level=this_max_abs_contour_level,
            contour_interval=this_contour_interval,
            line_width=saliency_contour_line_width)

        this_figure_file_name = '{0:s}/example{1:06d}_saliency.jpg'.format(
            output_dir_name, i)

        print 'Saving figure to: "{0:s}"...'.format(this_figure_file_name)
        pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI)
        pyplot.close()
示例#13
0
def _run(input_model_file_name, narr_predictor_names, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_model_file_name):
    """Trains CNN with example files.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param narr_predictor_names: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_model_file_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    # Read architecture.
    print 'Reading architecture from: "{0:s}"...'.format(input_model_file_name)
    model_object = traditional_cnn.read_keras_model(input_model_file_name)
    model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.categorical_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=traditional_cnn.LIST_OF_METRIC_FUNCTIONS)

    print SEPARATOR_STRING
    model_object.summary()
    print SEPARATOR_STRING

    # Write metadata.
    input_tensor = model_object.input
    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    num_half_rows = int(numpy.round((num_grid_rows - 1) / 2))
    num_half_columns = int(numpy.round((num_grid_columns - 1) / 2))

    print 'Reading NARR mask from: "{0:s}"...'.format(NARR_MASK_FILE_NAME)
    narr_mask_matrix = ml_utils.read_narr_mask(NARR_MASK_FILE_NAME)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=output_model_file_name, raise_error_if_missing=False)
    print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name)

    traditional_cnn.write_model_metadata(
        pickle_file_name=model_metafile_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_examples_per_target_time=NUM_EXAMPLES_PER_TIME_DUMMY,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        num_rows_in_half_grid=num_half_rows,
        num_columns_in_half_grid=num_half_columns,
        dilation_distance_metres=DILATION_DISTANCE_METRES,
        class_fractions=CLASS_FRACTIONS,
        weight_loss_function=WEIGHT_LOSS_FLAG,
        narr_predictor_names=narr_predictor_names,
        pressure_level_mb=PRESSURE_LEVEL_MB,
        training_start_time_unix_sec=first_training_time_unix_sec,
        training_end_time_unix_sec=last_training_time_unix_sec,
        validation_start_time_unix_sec=first_validation_time_unix_sec,
        validation_end_time_unix_sec=last_validation_time_unix_sec,
        num_lead_time_steps=None,
        predictor_time_step_offsets=None,
        narr_mask_matrix=narr_mask_matrix)

    print SEPARATOR_STRING

    traditional_cnn.quick_train_3d(
        model_object=model_object,
        output_file_name=output_model_file_name,
        num_examples_per_batch=num_examples_per_batch,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_start_time_unix_sec=first_training_time_unix_sec,
        training_end_time_unix_sec=last_training_time_unix_sec,
        top_training_dir_name=top_training_dir_name,
        top_validation_dir_name=top_validation_dir_name,
        narr_predictor_names=narr_predictor_names,
        num_classes=NUM_CLASSES,
        num_rows_in_half_grid=num_half_rows,
        num_columns_in_half_grid=num_half_columns,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_start_time_unix_sec=first_validation_time_unix_sec,
        validation_end_time_unix_sec=last_validation_time_unix_sec)
示例#14
0
def _run(input_model_file_name, narr_predictor_names, pressure_level_mb,
         dilation_distance_metres, num_lead_time_steps,
         predictor_time_step_offsets, num_examples_per_time,
         weight_loss_function, class_fractions, top_narr_directory_name,
         top_frontal_grid_dir_name, narr_mask_file_name,
         first_training_time_string, last_training_time_string,
         first_validation_time_string, last_validation_time_string,
         num_examples_per_batch, num_epochs, num_training_batches_per_epoch,
         num_validation_batches_per_epoch, output_model_file_name):
    """Trains CNN from scratch.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param narr_predictor_names: Same.
    :param pressure_level_mb: Same.
    :param dilation_distance_metres: Same.
    :param num_lead_time_steps: Same.
    :param predictor_time_step_offsets: Same.
    :param num_examples_per_time: Same.
    :param weight_loss_function: Same.
    :param class_fractions: Same.
    :param top_narr_directory_name: Same.
    :param top_frontal_grid_dir_name: Same.
    :param narr_mask_file_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_model_file_name: Same.
    :raises: ValueError: if `num_lead_time_steps > 1`.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if narr_mask_file_name == '':
        narr_mask_file_name = None
        narr_mask_matrix = None

    if num_lead_time_steps <= 1:
        num_lead_time_steps = None
        predictor_time_step_offsets = None
    else:
        error_string = (
            'This script cannot yet handle num_lead_time_steps > 1 '
            '(specifically {0:d}).'
        ).format(num_lead_time_steps)

        raise ValueError(error_string)

    # Read architecture.
    print 'Reading architecture from: "{0:s}"...'.format(input_model_file_name)
    model_object = traditional_cnn.read_keras_model(input_model_file_name)
    model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(
        loss=keras.losses.categorical_crossentropy,
        optimizer=keras.optimizers.Adam(),
        metrics=traditional_cnn.LIST_OF_METRIC_FUNCTIONS)

    print SEPARATOR_STRING
    model_object.summary()
    print SEPARATOR_STRING

    # Write metadata.
    input_tensor = model_object.input
    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    num_half_rows = int(numpy.round((num_grid_rows - 1) / 2))
    num_half_columns = int(numpy.round((num_grid_columns - 1) / 2))

    if narr_mask_file_name is not None:
        print 'Reading NARR mask from: "{0:s}"...'.format(narr_mask_file_name)
        narr_mask_matrix = ml_utils.read_narr_mask(narr_mask_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=output_model_file_name, raise_error_if_missing=False)
    print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name)

    traditional_cnn.write_model_metadata(
        pickle_file_name=model_metafile_name, num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_examples_per_target_time=num_examples_per_time,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        num_rows_in_half_grid=num_half_rows,
        num_columns_in_half_grid=num_half_columns,
        dilation_distance_metres=dilation_distance_metres,
        class_fractions=class_fractions,
        weight_loss_function=weight_loss_function,
        narr_predictor_names=narr_predictor_names,
        pressure_level_mb=pressure_level_mb,
        training_start_time_unix_sec=first_training_time_unix_sec,
        training_end_time_unix_sec=last_training_time_unix_sec,
        validation_start_time_unix_sec=first_validation_time_unix_sec,
        validation_end_time_unix_sec=last_validation_time_unix_sec,
        num_lead_time_steps=num_lead_time_steps,
        predictor_time_step_offsets=predictor_time_step_offsets,
        narr_mask_matrix=narr_mask_matrix)

    print SEPARATOR_STRING

    traditional_cnn.train_with_3d_examples(
        model_object=model_object, output_file_name=output_model_file_name,
        num_examples_per_batch=num_examples_per_batch, num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        num_examples_per_target_time=num_examples_per_time,
        training_start_time_unix_sec=first_training_time_unix_sec,
        training_end_time_unix_sec=last_training_time_unix_sec,
        top_narr_directory_name=top_narr_directory_name,
        top_frontal_grid_dir_name=top_frontal_grid_dir_name,
        narr_predictor_names=narr_predictor_names,
        pressure_level_mb=pressure_level_mb,
        dilation_distance_metres=dilation_distance_metres,
        class_fractions=class_fractions,
        num_rows_in_half_grid=num_half_rows,
        num_columns_in_half_grid=num_half_columns,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_start_time_unix_sec=first_validation_time_unix_sec,
        validation_end_time_unix_sec=last_validation_time_unix_sec,
        narr_mask_matrix=narr_mask_matrix)
示例#15
0
    def test_find_metafile(self):
        """Ensures correct output from find_metafile."""

        this_file_name = traditional_cnn.find_metafile(
            model_file_name=MODEL_FILE_NAME, raise_error_if_missing=False)
        self.assertTrue(this_file_name == MODEL_METAFILE_NAME)
示例#16
0
def _run(upconvnet_file_name, top_example_dir_name, first_time_string,
         last_time_string, num_baseline_examples, num_test_examples,
         percent_svd_variance_to_keep, top_output_dir_name):
    """Runs novelty detection.

    :param upconvnet_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_time_string: Same.
    :param last_time_string: Same.
    :param num_baseline_examples: Same.
    :param num_test_examples: Same.
    :param percent_svd_variance_to_keep: Same.
    :param top_output_dir_name: Same.
    """

    # Read upconvnet and metadata.
    ucn_metafile_name = traditional_cnn.find_metafile(
        model_file_name=upconvnet_file_name, raise_error_if_missing=True)

    print('Reading trained upconvnet from: "{0:s}"...'.format(
        upconvnet_file_name))
    ucn_model_object = traditional_cnn.read_keras_model(upconvnet_file_name)

    print('Reading upconvnet metadata from: "{0:s}"...'.format(
        ucn_metafile_name))
    ucn_metadata_dict = upconvnet.read_model_metadata(ucn_metafile_name)

    # Read CNN and metadata.
    cnn_file_name = ucn_metadata_dict[upconvnet.CNN_FILE_NAME_KEY]
    cnn_metafile_name = traditional_cnn.find_metafile(
        model_file_name=cnn_file_name, raise_error_if_missing=True)

    print 'Reading trained CNN from: "{0:s}"...'.format(cnn_file_name)
    cnn_model_object = traditional_cnn.read_keras_model(cnn_file_name)

    print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)
    cnn_metadata_dict = traditional_cnn.read_model_metadata(cnn_metafile_name)
    print SEPARATOR_STRING

    baseline_image_matrix, test_image_matrix = _find_baseline_and_test_examples(
        top_example_dir_name=top_example_dir_name,
        first_time_string=first_time_string,
        last_time_string=last_time_string,
        num_baseline_examples=num_baseline_examples,
        num_test_examples=num_test_examples,
        cnn_model_object=cnn_model_object,
        cnn_metadata_dict=cnn_metadata_dict)
    print SEPARATOR_STRING

    novelty_dict = novelty_detection.do_novelty_detection(
        baseline_image_matrix=baseline_image_matrix,
        test_image_matrix=test_image_matrix,
        cnn_model_object=cnn_model_object,
        cnn_feature_layer_name=traditional_cnn.get_flattening_layer(
            cnn_model_object),
        ucn_model_object=ucn_model_object,
        num_novel_test_images=num_test_examples,
        norm_function=None,
        denorm_function=None,
        percent_svd_variance_to_keep=percent_svd_variance_to_keep)
    print SEPARATOR_STRING

    novelty_dict[novelty_detection.UCN_FILE_NAME_KEY] = upconvnet_file_name
    novelty_file_name = '{0:s}/novelty_results.p'.format(top_output_dir_name)

    print 'Writing results to: "{0:s}"...\n'.format(novelty_file_name)
    novelty_detection.write_results(novelty_dict=novelty_dict,
                                    pickle_file_name=novelty_file_name)

    for i in range(num_test_examples):
        _plot_results(novelty_dict=novelty_dict,
                      narr_predictor_names=cnn_metadata_dict[
                          traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
                      test_index=i,
                      top_output_dir_name=top_output_dir_name)
        print '\n'
示例#17
0
def _run(model_file_name, example_file_name, num_examples, example_indices,
         layer_names, top_output_dir_name):
    """Plots feature maps for each example and CNN layer.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param example_file_name: Same.
    :param num_examples: Same.
    :param example_indices: Same.
    :param layer_names: Same.
    :param top_output_dir_name: Same.
    """

    if num_examples <= 0:
        num_examples = None

    if num_examples is None:
        error_checking.assert_is_geq_numpy_array(example_indices, 0)
    else:
        error_checking.assert_is_greater(num_examples, 0)

    print 'Reading model from: "{0:s}"...'.format(model_file_name)
    model_object = traditional_cnn.read_keras_model(model_file_name)

    model_metafile_name = traditional_cnn.find_metafile(
        model_file_name=model_file_name)

    print 'Reading model metadata from: "{0:s}"...'.format(model_metafile_name)
    model_metadata_dict = traditional_cnn.read_model_metadata(
        model_metafile_name)

    print 'Reading normalized examples from: "{0:s}"...'.format(
        example_file_name)
    example_dict = trainval_io.read_downsized_3d_examples(
        netcdf_file_name=example_file_name,
        predictor_names_to_keep=model_metadata_dict[
            traditional_cnn.NARR_PREDICTOR_NAMES_KEY],
        num_half_rows_to_keep=model_metadata_dict[
            traditional_cnn.NUM_ROWS_IN_HALF_GRID_KEY],
        num_half_columns_to_keep=model_metadata_dict[
            traditional_cnn.NUM_COLUMNS_IN_HALF_GRID_KEY])

    print SEPARATOR_STRING
    predictor_matrix = example_dict[trainval_io.PREDICTOR_MATRIX_KEY]

    if num_examples is not None:
        num_examples_total = predictor_matrix.shape[0]
        example_indices = numpy.linspace(0,
                                         num_examples_total - 1,
                                         num=num_examples_total,
                                         dtype=int)

        num_examples = min([num_examples, num_examples_total])
        example_indices = numpy.random.choice(example_indices,
                                              size=num_examples,
                                              replace=False)

    predictor_matrix = predictor_matrix[example_indices, ...]
    num_examples = predictor_matrix.shape[0]

    num_layers = len(layer_names)
    feature_matrix_by_layer = [None] * num_layers

    for k in range(num_layers):
        print 'Creating feature maps for layer "{0:s}"...'.format(
            layer_names[k])

        this_partial_model_object = cnn.model_to_feature_generator(
            model_object=model_object, feature_layer_name=layer_names[k])

        feature_matrix_by_layer[k] = this_partial_model_object.predict(
            predictor_matrix, batch_size=num_examples)

    print SEPARATOR_STRING

    for k in range(num_layers):
        this_output_dir_name = '{0:s}/{1:s}'.format(top_output_dir_name,
                                                    layer_names[k])
        file_system_utils.mkdir_recursive_if_necessary(
            directory_name=this_output_dir_name)

        _plot_feature_maps_one_layer(feature_matrix=feature_matrix_by_layer[k],
                                     layer_name=layer_names[k],
                                     output_dir_name=this_output_dir_name)
        print SEPARATOR_STRING