def _find_input_files(
        top_input_dir_name, first_spc_date_string, last_spc_date_string):
    """Finds input files (containing unshuffled examples).

    :param top_input_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :return: input_example_file_names: 1-D list of paths to input files.
    :return: num_input_examples: Total number of examples in these files.
    """

    input_example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_input_dir_name, shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    num_input_examples = 0

    for this_file_name in input_example_file_names:
        print 'Reading data from: "{0:s}"...'.format(this_file_name)
        this_example_dict = input_examples.read_example_file(
            netcdf_file_name=this_file_name, metadata_only=True)

        num_input_examples += len(
            this_example_dict[input_examples.STORM_IDS_KEY])

    return input_example_file_names, num_input_examples
Exemplo n.º 2
0
def _run(input_cnn_file_name, input_upconvnet_file_name,
         cnn_feature_layer_name, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_dir_name):
    """Trains upconvnet.

    This is effectively the main method.

    :param input_cnn_file_name: See documentation at top of file.
    :param input_upconvnet_file_name: Same.
    :param cnn_feature_layer_name: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)
    # argument_file_name = '{0:s}/input_args.p'.format(output_dir_name)
    # print('Writing input args to: "{0:s}"...'.format(argument_file_name))
    #
    # argument_file_handle = open(argument_file_name, 'wb')
    # pickle.dump(INPUT_ARG_OBJECT.__dict__, argument_file_handle)
    # argument_file_handle.close()
    #
    # return

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Read trained CNN.
    print('Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name))
    cnn_model_object = cnn.read_model(input_cnn_file_name)
    cnn_model_object.summary()
    print(SEPARATOR_STRING)

    cnn_metafile_name = cnn.find_metafile(model_file_name=input_cnn_file_name,
                                          raise_error_if_missing=True)

    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

    # Read architecture.
    print('Reading upconvnet architecture from: "{0:s}"...'.format(
        input_upconvnet_file_name))
    upconvnet_model_object = cnn.read_model(input_upconvnet_file_name)
    # upconvnet_model_object = keras.models.clone_model(upconvnet_model_object)

    # TODO(thunderhoser): This is a HACK.
    upconvnet_model_object.compile(loss=keras.losses.mean_squared_error,
                                   optimizer=keras.optimizers.Adam())

    upconvnet_model_object.summary()
    print(SEPARATOR_STRING)

    upconvnet_metafile_name = cnn.find_metafile(
        model_file_name='{0:s}/foo.h5'.format(output_dir_name),
        raise_error_if_missing=False)
    print('Writing upconvnet metadata to: "{0:s}"...'.format(
        upconvnet_metafile_name))

    upconvnet.write_model_metadata(
        cnn_file_name=input_cnn_file_name,
        cnn_feature_layer_name=cnn_feature_layer_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec,
        pickle_file_name=upconvnet_metafile_name)

    print(SEPARATOR_STRING)

    upconvnet.train_upconvnet(
        upconvnet_model_object=upconvnet_model_object,
        output_dir_name=output_dir_name,
        cnn_model_object=cnn_model_object,
        cnn_metadata_dict=cnn_metadata_dict,
        cnn_feature_layer_name=cnn_feature_layer_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec)
Exemplo n.º 3
0
def _run(model_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples, do_backwards_test,
         separate_radar_heights, downsampling_keys, downsampling_values,
         num_bootstrap_reps, output_file_name):
    """Runs permutation test for predictor importance.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples: Same.
    :param do_backwards_test: Same.
    :param separate_radar_heights: Same.
    :param downsampling_keys: Same.
    :param downsampling_values: Same.
    :param num_bootstrap_reps: Same.
    :param output_file_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    metafile_name = cnn.find_metafile(model_file_name=model_file_name)
    print('Reading metadata from: "{0:s}"...'.format(metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(metafile_name)

    if len(downsampling_keys) > 1:
        downsampling_dict = dict(
            list(zip(downsampling_keys, downsampling_values)))
    else:
        downsampling_dict = None

    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]
    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))
    training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = (
        NUM_EXAMPLES_PER_BATCH)

    if cnn_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            desired_num_examples=num_examples,
            list_of_operation_dicts=cnn_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY])

    elif cnn_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict,
            desired_num_examples=num_examples)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict,
            desired_num_examples=num_examples)

    full_storm_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    target_values = numpy.array([], dtype=int)
    predictor_matrices = None

    print(SEPARATOR_STRING)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_storm_id_strings += this_storm_object_dict[
            testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate(
            (storm_times_unix_sec,
             this_storm_object_dict[testing_io.STORM_TIMES_KEY]))

        these_target_values = this_storm_object_dict[
            testing_io.TARGET_ARRAY_KEY]
        if len(these_target_values.shape) > 1:
            these_target_values = numpy.argmax(these_target_values, axis=1)

        target_values = numpy.concatenate((target_values, these_target_values))

        these_predictor_matrices = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]

        if predictor_matrices is None:
            predictor_matrices = copy.deepcopy(these_predictor_matrices)
        else:
            for k in range(len(predictor_matrices)):
                predictor_matrices[k] = numpy.concatenate(
                    (predictor_matrices[k], these_predictor_matrices[k]))

    print(SEPARATOR_STRING)
    correlation_matrix, predictor_names = correlation.get_pearson_correlations(
        predictor_matrices=predictor_matrices,
        cnn_metadata_dict=cnn_metadata_dict,
        separate_radar_heights=separate_radar_heights)
    print(SEPARATOR_STRING)

    num_predictors = len(predictor_names)

    for i in range(num_predictors):
        for j in range(i, num_predictors):
            print(('Pearson correlation between "{0:s}" and "{1:s}" = {2:.3f}'
                   ).format(predictor_names[i], predictor_names[j],
                            correlation_matrix[i, j]))

    print(SEPARATOR_STRING)

    if do_backwards_test:
        result_dict = permutation.run_backwards_test(
            model_object=model_object,
            predictor_matrices=predictor_matrices,
            target_values=target_values,
            cnn_metadata_dict=cnn_metadata_dict,
            cost_function=permutation_utils.negative_auc_function,
            separate_radar_heights=separate_radar_heights,
            num_bootstrap_reps=num_bootstrap_reps)
    else:
        result_dict = permutation.run_forward_test(
            model_object=model_object,
            predictor_matrices=predictor_matrices,
            target_values=target_values,
            cnn_metadata_dict=cnn_metadata_dict,
            cost_function=permutation_utils.negative_auc_function,
            separate_radar_heights=separate_radar_heights,
            num_bootstrap_reps=num_bootstrap_reps)

    print(SEPARATOR_STRING)

    result_dict[permutation_utils.MODEL_FILE_KEY] = model_file_name
    result_dict[permutation_utils.TARGET_VALUES_KEY] = target_values
    result_dict[permutation_utils.FULL_IDS_KEY] = full_storm_id_strings
    result_dict[permutation_utils.STORM_TIMES_KEY] = storm_times_unix_sec

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    permutation_utils.write_results(result_dict=result_dict,
                                    pickle_file_name=output_file_name)
def _run(input_model_file_name, sounding_field_names,
         normalization_type_string, normalization_param_file_name,
         min_normalized_value, max_normalized_value, target_name,
         downsampling_classes, downsampling_fractions, monitor_string,
         weight_loss_function, x_translations_pixels, y_translations_pixels,
         ccw_rotation_angles_deg, noise_standard_deviation, num_noisings,
         flip_in_x, flip_in_y, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         num_examples_per_train_batch, top_validation_dir_name,
         first_validation_time_string, last_validation_time_string,
         num_examples_per_validn_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_dir_name):
    """Trains CNN with 2-D and 3-D MYRORSS images.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param target_name: Same.
    :param downsampling_classes: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param x_translations_pixels: Same.
    :param y_translations_pixels: Same.
    :param ccw_rotation_angles_deg: Same.
    :param noise_standard_deviation: Same.
    :param num_noisings: Same.
    :param flip_in_x: Same.
    :param flip_in_y: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param num_examples_per_train_batch: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_validn_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    # argument_file_name = '{0:s}/input_args.p'.format(output_dir_name)
    # print('Writing input args to: "{0:s}"...'.format(argument_file_name))
    #
    # argument_file_handle = open(argument_file_name, 'wb')
    # pickle.dump(INPUT_ARG_OBJECT.__dict__, argument_file_handle)
    # argument_file_handle.close()
    #
    # return

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    if len(downsampling_classes) > 1:
        downsampling_dict = dict(
            list(zip(downsampling_classes, downsampling_fractions)))
    else:
        downsampling_dict = None

    if (len(x_translations_pixels) == 1
            and x_translations_pixels + y_translations_pixels == 0):
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    # Set output locations.
    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Read architecture.
    print(
        'Reading architecture from: "{0:s}"...'.format(input_model_file_name))
    model_object = cnn.read_model(input_model_file_name)
    # model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.binary_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print(SEPARATOR_STRING)
    model_object.summary()
    print(SEPARATOR_STRING)

    # Write metadata.
    metadata_dict = {
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.CONV_2D3D_KEY: True,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec,
        cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch
    }

    if isinstance(model_object.input, list):
        list_of_input_tensors = model_object.input
    else:
        list_of_input_tensors = [model_object.input]

    upsample_refl = len(list_of_input_tensors) == 2
    num_grid_rows = list_of_input_tensors[0].get_shape().as_list()[1]
    num_grid_columns = list_of_input_tensors[0].get_shape().as_list()[2]

    if upsample_refl:
        num_grid_rows = int(numpy.round(num_grid_rows / 2))
        num_grid_columns = int(numpy.round(num_grid_columns / 2))

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.TARGET_NAME_KEY: target_name,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch,
        trainval_io.RADAR_FIELDS_KEY:
        input_examples.AZIMUTHAL_SHEAR_FIELD_NAMES,
        trainval_io.RADAR_HEIGHTS_KEY: REFLECTIVITY_HEIGHTS_M_AGL,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y,
        trainval_io.UPSAMPLE_REFLECTIVITY_KEY: upsample_refl
    }

    print('Writing metadata to: "{0:s}"...'.format(model_metafile_name))
    cnn.write_model_metadata(pickle_file_name=model_metafile_name,
                             metadata_dict=metadata_dict,
                             training_option_dict=training_option_dict)

    cnn.train_cnn_2d3d_myrorss(
        model_object=model_object,
        model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec,
        num_examples_per_validn_batch=num_examples_per_validn_batch)
Exemplo n.º 5
0
def _run(model_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples, num_bootstrap_reps,
         confidence_level, class_fraction_keys, class_fraction_values,
         output_dir_name):
    """Evaluates CNN (convolutional neural net) predictions.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples: Same.
    :param num_bootstrap_reps: Same.
    :param confidence_level: Same.
    :param class_fraction_keys: Same.
    :param class_fraction_values: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if the model does multi-class classification.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    num_output_neurons = (
        model_object.layers[-1].output.get_shape().as_list()[-1])

    if num_output_neurons > 2:
        error_string = (
            'The model has {0:d} output neurons, which suggests {0:d}-class '
            'classification.  This script handles only binary classification.'
        ).format(num_output_neurons)

        raise ValueError(error_string)

    soundings_only = False

    if isinstance(model_object.input, list):
        list_of_input_tensors = model_object.input
    else:
        list_of_input_tensors = [model_object.input]

    if len(list_of_input_tensors) == 1:
        these_spatial_dim = numpy.array(
            list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int)
        soundings_only = len(these_spatial_dim) == 1

    model_directory_name, _ = os.path.split(model_file_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    if len(class_fraction_keys) > 1:
        class_to_sampling_fraction_dict = dict(
            list(zip(class_fraction_keys, class_fraction_values)))
    else:
        class_to_sampling_fraction_dict = None

    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))

    if soundings_only:
        generator_object = testing_io.sounding_generator(
            option_dict=training_option_dict, num_examples_total=num_examples)

    elif model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=num_examples)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=num_examples)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=num_examples)

    include_soundings = (training_option_dict[trainval_io.SOUNDING_FIELDS_KEY]
                         is not None)

    forecast_probabilities = numpy.array([])
    observed_labels = numpy.array([], dtype=int)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        observed_labels = numpy.concatenate(
            (observed_labels,
             this_storm_object_dict[testing_io.TARGET_ARRAY_KEY]))

        if soundings_only:
            these_predictor_matrices = [
                this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY]
            ]
        else:
            these_predictor_matrices = this_storm_object_dict[
                testing_io.INPUT_MATRICES_KEY]

        if include_soundings:
            this_sounding_matrix = these_predictor_matrices[-1]
        else:
            this_sounding_matrix = None

        if soundings_only:
            this_probability_matrix = cnn.apply_cnn_soundings_only(
                model_object=model_object,
                sounding_matrix=this_sounding_matrix,
                verbose=True)
        elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=these_predictor_matrices[0],
                    sounding_matrix=this_sounding_matrix,
                    verbose=True)
            else:
                this_probability_matrix = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=these_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=these_predictor_matrices[1],
                    sounding_matrix=this_sounding_matrix,
                    verbose=True)
        else:
            this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=these_predictor_matrices[0],
                sounding_matrix=this_sounding_matrix,
                verbose=True)

        print(SEPARATOR_STRING)

        forecast_probabilities = numpy.concatenate(
            (forecast_probabilities, this_probability_matrix[:, -1]))

    model_eval_helper.run_evaluation(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels,
        num_bootstrap_reps=num_bootstrap_reps,
        confidence_level=confidence_level,
        output_dir_name=output_dir_name)
def _run(top_example_dir_name, first_spc_date_string, last_spc_date_string,
         use_low_level, use_mid_level, num_radar_rows, num_radar_columns,
         output_dir_name):
    """Uses az-shear thresholds to make probabilistic tornado predictions.

    This is effectively the main method.

    :param top_example_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param use_low_level: Same.
    :param use_mid_level: Same.
    :param num_radar_rows: Same.
    :param num_radar_columns: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if `use_low_level == use_mid_level == False`.
    """

    if num_radar_rows <= 0 or num_radar_columns <= 0:
        num_reflectivity_rows = None
        num_reflectivity_columns = None
    else:
        num_reflectivity_rows = int(numpy.round(num_radar_rows / 2))
        num_reflectivity_columns = int(numpy.round(num_radar_columns / 2))

    if not (use_low_level or use_mid_level):
        error_string = (
            'At least one of `{0:s}` and `{1:s}` must be true.'
        ).format(LOW_LEVEL_ARG_NAME, MID_LEVEL_ARG_NAME)

        raise ValueError(error_string)

    radar_field_names = []
    if use_low_level:
        radar_field_names.append(radar_utils.LOW_LEVEL_SHEAR_NAME)
    if use_mid_level:
        radar_field_names.append(radar_utils.MID_LEVEL_SHEAR_NAME)

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name, shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: example_file_names,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: NUM_EXAMPLES_PER_BATCH,
        trainval_io.FIRST_STORM_TIME_KEY:
            time_conversion.get_start_of_spc_date(first_spc_date_string),
        trainval_io.LAST_STORM_TIME_KEY:
            time_conversion.get_end_of_spc_date(last_spc_date_string),
        trainval_io.RADAR_FIELDS_KEY: radar_field_names,
        trainval_io.RADAR_HEIGHTS_KEY: numpy.array([1000], dtype=int),
        trainval_io.NUM_ROWS_KEY: num_reflectivity_rows,
        trainval_io.NUM_COLUMNS_KEY: num_reflectivity_columns,
        trainval_io.UPSAMPLE_REFLECTIVITY_KEY: False,
        trainval_io.SOUNDING_FIELDS_KEY: None,
        trainval_io.SOUNDING_HEIGHTS_KEY: None,
        trainval_io.NORMALIZATION_TYPE_KEY: None,
        trainval_io.TARGET_NAME_KEY: TARGET_NAME,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: None
    }

    generator_object = testing_io.myrorss_generator_2d3d(
        option_dict=option_dict, desired_num_examples=LARGE_INTEGER)

    full_storm_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    predictor_values = numpy.array([], dtype=float)
    observed_labels = numpy.array([], dtype=int)

    while True:
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate((
            storm_times_unix_sec,
            this_storm_object_dict[testing_io.STORM_TIMES_KEY]
        ))

        this_shear_matrix_s01 = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY][1]
        print(this_shear_matrix_s01.shape)

        these_predictor_values = numpy.max(
            this_shear_matrix_s01, axis=(1, 2, 3)
        )

        predictor_values = numpy.concatenate((
            predictor_values, these_predictor_values
        ))

        observed_labels = numpy.concatenate((
            observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY]
        ))

    forecast_probabilities = (
        rankdata(predictor_values, method='average') / len(predictor_values)
    )

    forecast_probabilities = numpy.reshape(
        forecast_probabilities, (len(forecast_probabilities), 1)
    )

    class_probability_matrix = numpy.hstack((
        1. - forecast_probabilities, forecast_probabilities
    ))

    output_file_name = prediction_io.find_ungridded_file(
        directory_name=output_dir_name, raise_error_if_missing=False)

    print('Writing results to: "{0:s}"...'.format(output_file_name))

    prediction_io.write_ungridded_predictions(
        netcdf_file_name=output_file_name,
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels, storm_ids=full_storm_id_strings,
        storm_times_unix_sec=storm_times_unix_sec,
        target_name=TARGET_NAME, model_file_name='None')
Exemplo n.º 7
0
def _run(input_cnn_file_name, input_upconvnet_file_name,
         cnn_feature_layer_name, top_training_dir_name,
         first_training_time_string, last_training_time_string,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_model_file_name):
    """Trains upconvnet.

    This is effectively the main method.

    :param input_cnn_file_name: See documentation at top of file.
    :param input_upconvnet_file_name: Same.
    :param cnn_feature_layer_name: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_model_file_name: Same.
    """

    # Find training and validation files.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    print 'Reading trained CNN from: "{0:s}"...'.format(input_cnn_file_name)
    cnn_model_object = cnn.read_model(input_cnn_file_name)
    cnn_model_object.summary()
    print SEPARATOR_STRING

    cnn_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(input_cnn_file_name)[0])

    print 'Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name)
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)

    print 'Reading upconvnet architecture from: "{0:s}"...'.format(
        input_upconvnet_file_name)
    upconvnet_model_object = cnn.read_model(input_upconvnet_file_name)
    upconvnet_model_object = keras.models.clone_model(upconvnet_model_object)

    # TODO(thunderhoser): This is a HACK.
    upconvnet_model_object.compile(loss=keras.losses.mean_squared_error,
                                   optimizer=keras.optimizers.Adam())

    print SEPARATOR_STRING
    upconvnet_model_object.summary()
    print SEPARATOR_STRING

    upconvnet_metafile_name = '{0:s}/model_metadata.p'.format(
        os.path.split(output_model_file_name)[0])

    print 'Writing upconvnet metadata to: "{0:s}"...'.format(
        upconvnet_metafile_name)

    upconvnet.write_model_metadata(
        cnn_file_name=input_cnn_file_name,
        cnn_feature_layer_name=cnn_feature_layer_name,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec,
        pickle_file_name=upconvnet_metafile_name)

    print SEPARATOR_STRING

    upconvnet.train_upconvnet(
        upconvnet_model_object=upconvnet_model_object,
        output_model_file_name=output_model_file_name,
        cnn_model_object=cnn_model_object,
        cnn_feature_layer_name=cnn_feature_layer_name,
        cnn_metadata_dict=cnn_metadata_dict,
        num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec)
Exemplo n.º 8
0
def _run(upconvnet_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples_per_date, downsampling_keys,
         downsampling_values, top_output_dir_name):
    """Makes predictions from trained upconvnet.

    This is effectively the main method.

    :param upconvnet_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples_per_date: Same.
    :param downsampling_keys: Same.
    :param downsampling_values: Same.
    :param top_output_dir_name: Same.
    """

    # Process input args.
    print('Reading upconvnet from: "{0:s}"...'.format(upconvnet_file_name))
    upconvnet_model_object = cnn.read_model(upconvnet_file_name)
    upconvnet_metafile_name = cnn.find_metafile(upconvnet_file_name)

    print('Reading upconvnet metadata from: "{0:s}"...'.format(
        upconvnet_metafile_name))
    upconvnet_metadata_dict = upconvnet.read_model_metadata(
        upconvnet_metafile_name)
    cnn_file_name = upconvnet_metadata_dict[upconvnet.CNN_FILE_KEY]

    print('Reading CNN from: "{0:s}"...'.format(cnn_file_name))
    cnn_model_object = cnn.read_model(cnn_file_name)
    cnn_metafile_name = cnn.find_metafile(cnn_file_name)

    print('Reading CNN metadata from: "{0:s}"...'.format(cnn_metafile_name))
    cnn_metadata_dict = cnn.read_model_metadata(cnn_metafile_name)
    training_option_dict = cnn_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    if len(downsampling_keys) > 1:
        downsampling_dict = dict(
            list(zip(downsampling_keys, downsampling_values)))
    else:
        downsampling_dict = None

    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict

    training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = (
        NUM_EXAMPLES_PER_BATCH)
    training_option_dict[
        trainval_io.FIRST_STORM_TIME_KEY] = EARLY_TIME_UNIX_SEC
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = LATE_TIME_UNIX_SEC

    # Find example files.
    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    # Do dirty work.
    for this_example_file_name in example_file_names:
        _apply_upconvnet_one_file(
            example_file_name=this_example_file_name,
            num_examples=num_examples_per_date,
            upconvnet_model_object=upconvnet_model_object,
            cnn_model_object=cnn_model_object,
            cnn_metadata_dict=cnn_metadata_dict,
            cnn_feature_layer_name=upconvnet_metadata_dict[
                upconvnet.CNN_FEATURE_LAYER_KEY],
            upconvnet_file_name=upconvnet_file_name,
            top_output_dir_name=top_output_dir_name)

        print(SEPARATOR_STRING)
def _run(top_example_dir_name, first_spc_date_string, last_spc_date_string,
         min_percentile_level, max_percentile_level, num_radar_rows,
         num_radar_columns, output_file_name):
    """Finds normalization parameters for GridRad data.

    This is effectively the main method.

    :param top_example_dir_name: See documentation at top of file.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param min_percentile_level: Same.
    :param max_percentile_level: Same.
    :param num_radar_rows: Same.
    :param num_radar_columns: Same.
    :param output_file_name: Same.
    """

    if num_radar_rows <= 0:
        num_radar_rows = None
    if num_radar_columns <= 0:
        num_radar_columns = None

    first_time_unix_sec = time_conversion.get_start_of_spc_date(
        first_spc_date_string)
    last_time_unix_sec = time_conversion.get_end_of_spc_date(
        last_spc_date_string)

    # example_file_names = input_examples.find_many_example_files(
    #     top_directory_name=top_example_dir_name, shuffled=True,
    #     first_batch_number=0, last_batch_number=LARGE_INTEGER,
    #     raise_error_if_any_missing=False)

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=example_file_names[0], read_all_target_vars=True)

    sounding_field_names = this_example_dict[
        input_examples.SOUNDING_FIELDS_KEY]
    sounding_heights_m_agl = this_example_dict[
        input_examples.SOUNDING_HEIGHTS_KEY]

    if input_examples.REFL_IMAGE_MATRIX_KEY in this_example_dict:
        num_radar_dimensions = -1
    else:
        num_radar_dimensions = (len(
            this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY].shape) -
                                2)

    # TODO(thunderhoser): Put this in separate method.
    if num_radar_dimensions == 3:
        radar_field_names = this_example_dict[input_examples.RADAR_FIELDS_KEY]
        radar_heights_m_agl = this_example_dict[
            input_examples.RADAR_HEIGHTS_KEY]

        radar_field_name_by_pair = []
        radar_height_by_pair_m_agl = numpy.array([], dtype=int)

        for this_field_name in radar_field_names:
            radar_field_name_by_pair += ([this_field_name] *
                                         len(radar_heights_m_agl))
            radar_height_by_pair_m_agl = numpy.concatenate(
                (radar_height_by_pair_m_agl, radar_heights_m_agl))

    elif num_radar_dimensions == 2:
        radar_field_name_by_pair = this_example_dict[
            input_examples.RADAR_FIELDS_KEY]
        radar_height_by_pair_m_agl = this_example_dict[
            input_examples.RADAR_HEIGHTS_KEY]

        radar_field_names = list(set(radar_field_name_by_pair))
        radar_field_names.sort()

    else:
        az_shear_field_names = this_example_dict[
            input_examples.RADAR_FIELDS_KEY]
        radar_field_names = [radar_utils.REFL_NAME] + az_shear_field_names

        refl_heights_m_agl = this_example_dict[
            input_examples.RADAR_HEIGHTS_KEY]
        radar_field_name_by_pair = (
            [radar_utils.REFL_NAME] * len(refl_heights_m_agl) +
            az_shear_field_names)

        az_shear_heights_m_agl = numpy.full(len(az_shear_field_names),
                                            radar_utils.SHEAR_HEIGHT_M_ASL)
        radar_height_by_pair_m_agl = numpy.concatenate(
            (refl_heights_m_agl, az_shear_heights_m_agl)).astype(int)

    # Initialize parameters.
    orig_parameter_dict = {
        NUM_VALUES_KEY: 0,
        MEAN_VALUE_KEY: 0.,
        MEAN_OF_SQUARES_KEY: 0.
    }

    radar_z_score_dict_no_height = {}
    radar_z_score_dict_with_height = {}
    radar_freq_dict_no_height = {}
    num_radar_fields = len(radar_field_names)
    num_radar_field_height_pairs = len(radar_field_name_by_pair)

    for j in range(num_radar_fields):
        radar_z_score_dict_no_height[radar_field_names[j]] = copy.deepcopy(
            orig_parameter_dict)
        radar_freq_dict_no_height[radar_field_names[j]] = {}

    for k in range(num_radar_field_height_pairs):
        radar_z_score_dict_with_height[
            radar_field_name_by_pair[k],
            radar_height_by_pair_m_agl[k]] = copy.deepcopy(orig_parameter_dict)

    sounding_z_score_dict_no_height = {}
    sounding_z_score_dict_with_height = {}
    sounding_freq_dict_no_height = {}
    num_sounding_fields = len(sounding_field_names)
    num_sounding_heights = len(sounding_heights_m_agl)

    for j in range(num_sounding_fields):
        sounding_z_score_dict_no_height[sounding_field_names[j]] = (
            copy.deepcopy(orig_parameter_dict))
        sounding_freq_dict_no_height[sounding_field_names[j]] = {}

        for k in range(num_sounding_heights):
            sounding_z_score_dict_with_height[
                sounding_field_names[j],
                sounding_heights_m_agl[k]] = copy.deepcopy(orig_parameter_dict)

    for this_example_file_name in example_file_names:
        print('Reading data from: "{0:s}"...'.format(this_example_file_name))
        this_example_dict = input_examples.read_example_file(
            netcdf_file_name=this_example_file_name,
            read_all_target_vars=True,
            num_rows_to_keep=num_radar_rows,
            num_columns_to_keep=num_radar_columns,
            first_time_to_keep_unix_sec=first_time_unix_sec,
            last_time_to_keep_unix_sec=last_time_unix_sec)

        this_num_examples = len(this_example_dict[input_examples.FULL_IDS_KEY])
        if this_num_examples == 0:
            continue

        for j in range(num_radar_fields):
            print('Updating normalization params for "{0:s}"...'.format(
                radar_field_names[j]))

            if num_radar_dimensions == 3:
                this_field_index = this_example_dict[
                    input_examples.RADAR_FIELDS_KEY].index(
                        radar_field_names[j])

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][...,
                                                           this_field_index]

            elif num_radar_dimensions == 2:
                all_field_names = numpy.array(
                    this_example_dict[input_examples.RADAR_FIELDS_KEY])

                these_field_indices = numpy.where(
                    all_field_names == radar_field_names[j])[0]

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][...,
                                                           these_field_indices]

            else:
                if radar_field_names[j] == radar_utils.REFL_NAME:
                    this_radar_matrix = this_example_dict[
                        input_examples.REFL_IMAGE_MATRIX_KEY][..., 0]
                else:
                    this_field_index = this_example_dict[
                        input_examples.RADAR_FIELDS_KEY].index(
                            radar_field_names[j])

                    this_radar_matrix = this_example_dict[
                        input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][
                            ..., this_field_index]

            radar_z_score_dict_no_height[radar_field_names[j]] = (
                _update_z_score_params(
                    z_score_param_dict=radar_z_score_dict_no_height[
                        radar_field_names[j]],
                    new_data_matrix=this_radar_matrix))

            radar_freq_dict_no_height[radar_field_names[j]] = (
                _update_frequency_dict(
                    frequency_dict=radar_freq_dict_no_height[
                        radar_field_names[j]],
                    new_data_matrix=this_radar_matrix,
                    rounding_base=RADAR_INTERVAL_DICT[radar_field_names[j]]))

        for k in range(num_radar_field_height_pairs):
            print(('Updating normalization params for "{0:s}" at {1:d} metres '
                   'AGL...').format(radar_field_name_by_pair[k],
                                    radar_height_by_pair_m_agl[k]))

            if num_radar_dimensions == 3:
                this_field_index = this_example_dict[
                    input_examples.RADAR_FIELDS_KEY].index(
                        radar_field_name_by_pair[k])

                this_height_index = numpy.where(
                    this_example_dict[input_examples.RADAR_HEIGHTS_KEY] ==
                    radar_height_by_pair_m_agl[k])[0][0]

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][...,
                                                           this_height_index,
                                                           this_field_index]

            elif num_radar_dimensions == 2:
                all_field_names = numpy.array(
                    this_example_dict[input_examples.RADAR_FIELDS_KEY])
                all_heights_m_agl = this_example_dict[
                    input_examples.RADAR_HEIGHTS_KEY]

                this_index = numpy.where(
                    numpy.logical_and(
                        all_field_names == radar_field_name_by_pair[k],
                        all_heights_m_agl ==
                        radar_height_by_pair_m_agl[k]))[0][0]

                this_radar_matrix = this_example_dict[
                    input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_index]

            else:
                if radar_field_name_by_pair[k] == radar_utils.REFL_NAME:
                    this_height_index = numpy.where(
                        this_example_dict[input_examples.RADAR_HEIGHTS_KEY] ==
                        radar_height_by_pair_m_agl[k])[0][0]

                    this_radar_matrix = this_example_dict[
                        input_examples.REFL_IMAGE_MATRIX_KEY][
                            ..., this_height_index, 0]
                else:
                    this_field_index = this_example_dict[
                        input_examples.RADAR_FIELDS_KEY].index(
                            radar_field_name_by_pair[k])

                    this_radar_matrix = this_example_dict[
                        input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][
                            ..., this_field_index]

            radar_z_score_dict_with_height[
                radar_field_name_by_pair[k],
                radar_height_by_pair_m_agl[k]] = _update_z_score_params(
                    z_score_param_dict=radar_z_score_dict_with_height[
                        radar_field_name_by_pair[k],
                        radar_height_by_pair_m_agl[k]],
                    new_data_matrix=this_radar_matrix)

        for j in range(num_sounding_fields):
            print('Updating normalization params for "{0:s}"...'.format(
                sounding_field_names[j]))

            this_field_index = this_example_dict[
                input_examples.SOUNDING_FIELDS_KEY].index(
                    sounding_field_names[j])

            this_sounding_matrix = this_example_dict[
                input_examples.SOUNDING_MATRIX_KEY][..., this_field_index]

            sounding_z_score_dict_no_height[sounding_field_names[j]] = (
                _update_z_score_params(
                    z_score_param_dict=sounding_z_score_dict_no_height[
                        sounding_field_names[j]],
                    new_data_matrix=this_sounding_matrix))

            sounding_freq_dict_no_height[sounding_field_names[j]] = (
                _update_frequency_dict(
                    frequency_dict=sounding_freq_dict_no_height[
                        sounding_field_names[j]],
                    new_data_matrix=this_sounding_matrix,
                    rounding_base=SOUNDING_INTERVAL_DICT[
                        sounding_field_names[j]]))

            for k in range(num_sounding_heights):
                this_height_index = numpy.where(
                    this_example_dict[input_examples.SOUNDING_HEIGHTS_KEY] ==
                    sounding_heights_m_agl[k])[0][0]

                this_sounding_matrix = this_example_dict[
                    input_examples.SOUNDING_MATRIX_KEY][..., this_height_index,
                                                        this_field_index]

                print(('Updating normalization params for "{0:s}" at {1:d} m '
                       'AGL...').format(sounding_field_names[j],
                                        sounding_heights_m_agl[k]))

                sounding_z_score_dict_with_height[
                    sounding_field_names[j],
                    sounding_heights_m_agl[k]] = _update_z_score_params(
                        z_score_param_dict=sounding_z_score_dict_with_height[
                            sounding_field_names[j],
                            sounding_heights_m_agl[k]],
                        new_data_matrix=this_sounding_matrix)

        print(SEPARATOR_STRING)

    # Convert dictionaries to pandas DataFrames.
    radar_table_no_height = _convert_normalization_params(
        z_score_dict_dict=radar_z_score_dict_no_height,
        frequency_dict_dict=radar_freq_dict_no_height,
        min_percentile_level=min_percentile_level,
        max_percentile_level=max_percentile_level)

    print('Normalization params for each radar field:\n{0:s}\n\n'.format(
        str(radar_table_no_height)))

    radar_table_with_height = _convert_normalization_params(
        z_score_dict_dict=radar_z_score_dict_with_height)

    print(('Normalization params for each radar field/height pair:\n{0:s}\n\n'
           ).format(str(radar_table_with_height)))

    sounding_table_no_height = _convert_normalization_params(
        z_score_dict_dict=sounding_z_score_dict_no_height,
        frequency_dict_dict=sounding_freq_dict_no_height,
        min_percentile_level=min_percentile_level,
        max_percentile_level=max_percentile_level)

    print('Normalization params for each sounding field:\n{0:s}\n\n'.format(
        str(sounding_table_no_height)))

    sounding_table_with_height = _convert_normalization_params(
        z_score_dict_dict=sounding_z_score_dict_with_height)

    print(
        ('Normalization params for each sounding field/height pair:\n{0:s}\n\n'
         ).format(str(sounding_table_with_height)))

    print('Writing normalization params to file: "{0:s}"...'.format(
        output_file_name))

    dl_utils.write_normalization_params(
        pickle_file_name=output_file_name,
        radar_table_no_height=radar_table_no_height,
        radar_table_with_height=radar_table_with_height,
        sounding_table_no_height=sounding_table_no_height,
        sounding_table_with_height=sounding_table_with_height)
Exemplo n.º 10
0
def _run(model_file_name, component_type_string, target_class, layer_name,
         neuron_indices_flattened, channel_indices, top_example_dir_name,
         first_spc_date_string, last_spc_date_string, output_file_name):
    """Creates activation maps for one class, neuron, or channel of a CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param component_type_string: Same.
    :param target_class: Same.
    :param layer_name: Same.
    :param neuron_indices_flattened: Same.
    :param channel_indices: Same.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param output_file_name: Same.
    """

    # Check input args.
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)
    model_interpretation.check_component_type(component_type_string)

    if component_type_string == CHANNEL_COMPONENT_TYPE_STRING:
        error_checking.assert_is_geq_numpy_array(channel_indices, 0)
    if component_type_string == NEURON_COMPONENT_TYPE_STRING:
        neuron_indices_flattened = neuron_indices_flattened.astype(float)
        neuron_indices_flattened[neuron_indices_flattened < 0] = numpy.nan

        neuron_indices_2d_list = general_utils.split_array_by_nan(
            neuron_indices_flattened)
        neuron_index_matrix = numpy.array(neuron_indices_2d_list, dtype=int)
    else:
        neuron_index_matrix = None

    # Read model and metadata.
    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    metadata_file_name = '{0:s}/model_metadata.p'.format(
        os.path.split(model_file_name)[0])

    print('Reading metadata from: "{0:s}"...'.format(metadata_file_name))
    model_metadata_dict = cnn.read_model_metadata(metadata_file_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    # Create generator.
    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = None
    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))

    if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=LARGE_INTEGER)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=LARGE_INTEGER)

    # Compute activation for each example (storm object) and model component.
    full_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    activation_matrix = None

    print(SEPARATOR_STRING)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
        except StopIteration:
            break

        this_list_of_input_matrices = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]
        these_id_strings = this_storm_object_dict[testing_io.FULL_IDS_KEY]
        these_times_unix_sec = this_storm_object_dict[
            testing_io.STORM_TIMES_KEY]

        full_id_strings += these_id_strings
        storm_times_unix_sec = numpy.concatenate(
            (storm_times_unix_sec, these_times_unix_sec))

        if component_type_string == CLASS_COMPONENT_TYPE_STRING:
            print('Computing activations for target class {0:d}...'.format(
                target_class))

            this_activation_matrix = (
                model_activation.get_class_activation_for_examples(
                    model_object=model_object,
                    target_class=target_class,
                    list_of_input_matrices=this_list_of_input_matrices))

            this_activation_matrix = numpy.reshape(
                this_activation_matrix, (len(this_activation_matrix), 1))

        elif component_type_string == NEURON_COMPONENT_TYPE_STRING:
            this_activation_matrix = None

            for j in range(neuron_index_matrix.shape[0]):
                print((
                    'Computing activations for neuron {0:s} in layer "{1:s}"...'
                ).format(str(neuron_index_matrix[j, :]), layer_name))

                these_activations = (
                    model_activation.get_neuron_activation_for_examples(
                        model_object=model_object,
                        layer_name=layer_name,
                        neuron_indices=neuron_index_matrix[j, :],
                        list_of_input_matrices=this_list_of_input_matrices))

                these_activations = numpy.reshape(these_activations,
                                                  (len(these_activations), 1))

                if this_activation_matrix is None:
                    this_activation_matrix = these_activations + 0.
                else:
                    this_activation_matrix = numpy.concatenate(
                        (this_activation_matrix, these_activations), axis=1)
        else:
            this_activation_matrix = None

            for this_channel_index in channel_indices:
                print(('Computing activations for channel {0:d} in layer '
                       '"{1:s}"...').format(this_channel_index, layer_name))

                these_activations = (
                    model_activation.get_channel_activation_for_examples(
                        model_object=model_object,
                        layer_name=layer_name,
                        channel_index=this_channel_index,
                        list_of_input_matrices=this_list_of_input_matrices,
                        stat_function_for_neuron_activations=K.max))

                these_activations = numpy.reshape(these_activations,
                                                  (len(these_activations), 1))

                if this_activation_matrix is None:
                    this_activation_matrix = these_activations + 0.
                else:
                    this_activation_matrix = numpy.concatenate(
                        (this_activation_matrix, these_activations), axis=1)

        if activation_matrix is None:
            activation_matrix = this_activation_matrix + 0.
        else:
            activation_matrix = numpy.concatenate(
                (activation_matrix, this_activation_matrix), axis=0)

        print(SEPARATOR_STRING)

    print('Writing activations to file: "{0:s}"...'.format(output_file_name))
    model_activation.write_file(pickle_file_name=output_file_name,
                                activation_matrix=activation_matrix,
                                full_id_strings=full_id_strings,
                                storm_times_unix_sec=storm_times_unix_sec,
                                model_file_name=model_file_name,
                                component_type_string=component_type_string,
                                target_class=target_class,
                                layer_name=layer_name,
                                neuron_index_matrix=neuron_index_matrix,
                                channel_indices=channel_indices)
Exemplo n.º 11
0
def _run(model_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples, class_fraction_keys,
         class_fraction_values, num_bootstrap_iters,
         bootstrap_confidence_level, output_file_name):
    """Runs permutation test for predictor importance.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples: Same.
    :param class_fraction_keys: Same.
    :param class_fraction_values: Same.
    :param num_bootstrap_iters: Same.
    :param bootstrap_confidence_level: Same.
    :param output_file_name: Same.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    model_directory_name, _ = os.path.split(model_file_name)
    metadata_file_name = '{0:s}/model_metadata.p'.format(model_directory_name)

    print('Reading metadata from: "{0:s}"...'.format(metadata_file_name))
    model_metadata_dict = cnn.read_model_metadata(metadata_file_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    if len(class_fraction_keys) > 1:
        class_to_sampling_fraction_dict = dict(
            list(zip(class_fraction_keys, class_fraction_values)))
    else:
        class_to_sampling_fraction_dict = None

    training_option_dict[
        trainval_io.SAMPLING_FRACTIONS_KEY] = class_to_sampling_fraction_dict

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name,
        shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string))
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string))

    if model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY],
            num_examples_total=num_examples)

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict, num_examples_total=num_examples)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict, num_examples_total=num_examples)

    full_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    target_values = numpy.array([], dtype=int)
    list_of_predictor_matrices = None

    print(SEPARATOR_STRING)

    for _ in range(len(example_file_names)):
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate(
            (storm_times_unix_sec,
             this_storm_object_dict[testing_io.STORM_TIMES_KEY]))

        these_target_values = this_storm_object_dict[
            testing_io.TARGET_ARRAY_KEY]
        if len(these_target_values.shape) > 1:
            these_target_values = numpy.argmax(these_target_values, axis=1)

        target_values = numpy.concatenate((target_values, these_target_values))

        these_predictor_matrices = this_storm_object_dict[
            testing_io.INPUT_MATRICES_KEY]

        if list_of_predictor_matrices is None:
            list_of_predictor_matrices = copy.deepcopy(
                these_predictor_matrices)
        else:
            for k in range(len(list_of_predictor_matrices)):
                list_of_predictor_matrices[k] = numpy.concatenate(
                    (list_of_predictor_matrices[k],
                     these_predictor_matrices[k]))

    predictor_names_by_matrix = _create_predictor_names(
        model_metadata_dict=model_metadata_dict,
        list_of_predictor_matrices=list_of_predictor_matrices)

    for i in range(len(predictor_names_by_matrix)):
        print('Predictors in {0:d}th matrix:\n{1:s}\n'.format(
            i + 1, str(predictor_names_by_matrix[i])))

    print(SEPARATOR_STRING)

    list_of_layer_operation_dicts = model_metadata_dict[
        cnn.LAYER_OPERATIONS_KEY]

    if list_of_layer_operation_dicts is not None:
        correlation_matrix, predictor_names = _get_pearson_correlations(
            list_of_predictor_matrices=list_of_predictor_matrices,
            predictor_names_by_matrix=predictor_names_by_matrix,
            sounding_heights_m_agl=training_option_dict[
                trainval_io.SOUNDING_HEIGHTS_KEY])

        for i in range(len(predictor_names)):
            for j in range(i, len(predictor_names)):
                print((
                    'Pearson correlation between "{0:s}" and "{1:s}" = {2:.4f}'
                ).format(predictor_names[i], predictor_names[j],
                         correlation_matrix[i, j]))

            print('\n')

    if model_metadata_dict[cnn.CONV_2D3D_KEY]:
        prediction_function = permutation.prediction_function_2d3d_cnn
    else:
        num_radar_dimensions = len(list_of_predictor_matrices[0].shape) - 2

        if num_radar_dimensions == 2:
            prediction_function = permutation.prediction_function_2d_cnn
        else:
            prediction_function = permutation.prediction_function_3d_cnn

    print(SEPARATOR_STRING)
    result_dict = permutation.run_permutation_test(
        model_object=model_object,
        list_of_input_matrices=list_of_predictor_matrices,
        predictor_names_by_matrix=predictor_names_by_matrix,
        target_values=target_values,
        prediction_function=prediction_function,
        cost_function=permutation.negative_auc_function,
        num_bootstrap_iters=num_bootstrap_iters,
        bootstrap_confidence_level=bootstrap_confidence_level)
    print(SEPARATOR_STRING)

    result_dict[permutation.MODEL_FILE_KEY] = model_file_name
    result_dict[permutation.TARGET_VALUES_KEY] = target_values
    result_dict[permutation.FULL_IDS_KEY] = full_id_strings
    result_dict[permutation.STORM_TIMES_KEY] = storm_times_unix_sec

    print('Writing results to: "{0:s}"...'.format(output_file_name))
    permutation.write_results(result_dict=result_dict,
                              pickle_file_name=output_file_name)
Exemplo n.º 12
0
def _run(input_model_file_name, sounding_field_names, normalization_type_string,
         normalization_param_file_name, min_normalized_value,
         max_normalized_value, target_name, downsampling_classes,
         downsampling_fractions, monitor_string, weight_loss_function,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, num_examples_per_train_batch,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_validn_batch, num_epochs,
         num_training_batches_per_epoch, num_validation_batches_per_epoch,
         output_dir_name):
    """Trains CNN with soundings only.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param target_name: Same.
    :param downsampling_classes: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param num_examples_per_train_batch: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_validn_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if len(downsampling_classes) > 1:
        downsampling_dict = dict(list(zip(
            downsampling_classes, downsampling_fractions
        )))
    else:
        downsampling_dict = None

    # Set output locations.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name, shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name, shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False)

    # Read architecture.
    print('Reading architecture from: "{0:s}"...'.format(input_model_file_name))
    model_object = cnn.read_model(input_model_file_name)
    # model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(
        loss=keras.losses.binary_crossentropy,
        optimizer=keras.optimizers.Adam(),
        metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print(SEPARATOR_STRING)
    model_object.summary()
    print(SEPARATOR_STRING)

    # Write metadata.
    metadata_dict = {
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.CONV_2D3D_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec,
        cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch
    }

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.TARGET_NAME_KEY: target_name,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict,
        trainval_io.LOOP_ONCE_KEY: False
    }

    print('Writing metadata to: "{0:s}"...'.format(model_metafile_name))
    cnn.write_model_metadata(
        pickle_file_name=model_metafile_name, metadata_dict=metadata_dict,
        training_option_dict=training_option_dict)

    cnn.train_cnn_with_soundings(
        model_object=model_object, model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name, num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec,
        num_examples_per_validn_batch=num_examples_per_validn_batch)
Exemplo n.º 13
0
def _run(input_model_file_name, radar_field_name_by_channel,
         layer_op_name_by_channel, min_height_by_channel_m_agl,
         max_height_by_channel_m_agl, sounding_field_names,
         normalization_type_string, normalization_param_file_name,
         min_normalized_value, max_normalized_value, target_name,
         shuffle_target, downsampling_classes, downsampling_fractions,
         monitor_string, weight_loss_function, x_translations_pixels,
         y_translations_pixels, ccw_rotation_angles_deg,
         noise_standard_deviation, num_noisings, flip_in_x, flip_in_y,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, num_examples_per_train_batch,
         top_validation_dir_name, first_validation_time_string,
         last_validation_time_string, num_examples_per_validn_batch,
         num_epochs, num_training_batches_per_epoch,
         num_validation_batches_per_epoch, output_dir_name):
    """Trains CNN with 2-D GridRad images.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param radar_field_name_by_channel: Same.
    :param layer_op_name_by_channel: Same.
    :param min_height_by_channel_m_agl: Same.
    :param max_height_by_channel_m_agl: Same.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param target_name: Same.
    :param shuffle_target: Same.
    :param downsampling_classes: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param x_translations_pixels: Same.
    :param y_translations_pixels: Same.
    :param ccw_rotation_angles_deg: Same.
    :param noise_standard_deviation: Same.
    :param num_noisings: Same.
    :param flip_in_x: Same.
    :param flip_in_y: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param num_examples_per_train_batch: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_validn_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    if top_validation_dir_name in ['', 'None']:
        top_validation_dir_name = None
        num_validation_batches_per_epoch = 0
        first_validation_time_unix_sec = 0
        last_validation_time_unix_sec = 0
    else:
        first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
            first_validation_time_string, TIME_FORMAT)
        last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
            last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    if len(downsampling_classes) > 1:
        class_to_sampling_fraction_dict = dict(
            list(zip(downsampling_classes, downsampling_fractions)))
    else:
        class_to_sampling_fraction_dict = None

    if (len(x_translations_pixels) == 1
            and x_translations_pixels + y_translations_pixels == 0):
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    num_channels = len(radar_field_name_by_channel)
    expected_dimensions = numpy.array([num_channels], dtype=int)

    error_checking.assert_is_numpy_array(numpy.array(layer_op_name_by_channel),
                                         exact_dimensions=expected_dimensions)

    error_checking.assert_is_numpy_array(min_height_by_channel_m_agl,
                                         exact_dimensions=expected_dimensions)
    error_checking.assert_is_numpy_array(max_height_by_channel_m_agl,
                                         exact_dimensions=expected_dimensions)

    list_of_layer_operation_dicts = [{}] * num_channels
    for m in range(num_channels):
        list_of_layer_operation_dicts[m] = {
            input_examples.RADAR_FIELD_KEY: radar_field_name_by_channel[m],
            input_examples.OPERATION_NAME_KEY: layer_op_name_by_channel[m],
            input_examples.MIN_HEIGHT_KEY: min_height_by_channel_m_agl[m],
            input_examples.MAX_HEIGHT_KEY: max_height_by_channel_m_agl[m]
        }

    # Set output locations.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    if top_validation_dir_name is None:
        validation_file_names = []
    else:
        validation_file_names = input_examples.find_many_example_files(
            top_directory_name=top_validation_dir_name,
            shuffled=True,
            first_batch_number=FIRST_BATCH_NUMBER,
            last_batch_number=LAST_BATCH_NUMBER,
            raise_error_if_any_missing=False)

    # Read architecture.
    print(
        'Reading architecture from: "{0:s}"...'.format(input_model_file_name))
    model_object = cnn.read_model(input_model_file_name)
    # model_object = clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.binary_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print(SEPARATOR_STRING)
    model_object.summary()
    print(SEPARATOR_STRING)

    print(K.eval(model_object.get_layer(name='radar_conv2d_2').weights[0]))
    print(SEPARATOR_STRING)

    # Write metadata.
    metadata_dict = {
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.CONV_2D3D_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec,
        cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch
    }

    input_tensor = model_object.input
    if isinstance(input_tensor, list):
        input_tensor = input_tensor[0]

    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.TARGET_NAME_KEY: target_name,
        trainval_io.SHUFFLE_TARGET_KEY: shuffle_target,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y
    }

    print('Writing metadata to: "{0:s}"...'.format(model_metafile_name))
    cnn.write_model_metadata(
        pickle_file_name=model_metafile_name,
        metadata_dict=metadata_dict,
        training_option_dict=training_option_dict,
        list_of_layer_operation_dicts=list_of_layer_operation_dicts)

    cnn.train_cnn_gridrad_2d_reduced(
        model_object=model_object,
        model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        list_of_layer_operation_dicts=list_of_layer_operation_dicts,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec,
        num_examples_per_validn_batch=num_examples_per_validn_batch)
Exemplo n.º 14
0
def _write_metadata_one_cnn(model_object, argument_dict):
    """Writes metadata for one CNN to file.

    :param model_object: Untrained CNN (instance of `keras.models.Model` or
        `keras.models.Sequential`).
    :param argument_dict: See doc for `_train_one_cnn`.
    :return: metadata_dict: See doc for `cnn.write_model_metadata`.
    :return: training_option_dict: Same.
    """

    from gewittergefahr.deep_learning import cnn
    from gewittergefahr.deep_learning import input_examples
    from gewittergefahr.deep_learning import \
        training_validation_io as trainval_io
    from gewittergefahr.scripts import deep_learning_helper as dl_helper

    # Read input args.
    sounding_field_names = argument_dict[dl_helper.SOUNDING_FIELDS_ARG_NAME]
    radar_field_name_by_channel = argument_dict[RADAR_FIELDS_KEY]
    layer_op_name_by_channel = argument_dict[LAYER_OPERATIONS_KEY]
    min_height_by_channel_m_agl = argument_dict[MIN_HEIGHTS_KEY]
    max_height_by_channel_m_agl = argument_dict[MAX_HEIGHTS_KEY]

    normalization_type_string = argument_dict[
        dl_helper.NORMALIZATION_TYPE_ARG_NAME]
    normalization_file_name = argument_dict[
        dl_helper.NORMALIZATION_FILE_ARG_NAME]
    min_normalized_value = argument_dict[dl_helper.MIN_NORM_VALUE_ARG_NAME]
    max_normalized_value = argument_dict[dl_helper.MAX_NORM_VALUE_ARG_NAME]

    target_name = argument_dict[dl_helper.TARGET_NAME_ARG_NAME]
    downsampling_classes = numpy.array(
        argument_dict[dl_helper.DOWNSAMPLING_CLASSES_ARG_NAME], dtype=int)
    downsampling_fractions = numpy.array(
        argument_dict[dl_helper.DOWNSAMPLING_FRACTIONS_ARG_NAME], dtype=float)

    monitor_string = argument_dict[dl_helper.MONITOR_ARG_NAME]
    weight_loss_function = bool(argument_dict[dl_helper.WEIGHT_LOSS_ARG_NAME])

    x_translations_pixels = numpy.array(
        argument_dict[dl_helper.X_TRANSLATIONS_ARG_NAME], dtype=int)
    y_translations_pixels = numpy.array(
        argument_dict[dl_helper.Y_TRANSLATIONS_ARG_NAME], dtype=int)
    ccw_rotation_angles_deg = numpy.array(
        argument_dict[dl_helper.ROTATION_ANGLES_ARG_NAME], dtype=float)
    noise_standard_deviation = argument_dict[dl_helper.NOISE_STDEV_ARG_NAME]
    num_noisings = argument_dict[dl_helper.NUM_NOISINGS_ARG_NAME]
    flip_in_x = bool(argument_dict[dl_helper.FLIP_X_ARG_NAME])
    flip_in_y = bool(argument_dict[dl_helper.FLIP_Y_ARG_NAME])

    top_training_dir_name = argument_dict[dl_helper.TRAINING_DIR_ARG_NAME]
    first_training_time_string = argument_dict[
        dl_helper.FIRST_TRAINING_TIME_ARG_NAME]
    last_training_time_string = argument_dict[
        dl_helper.LAST_TRAINING_TIME_ARG_NAME]
    num_examples_per_train_batch = argument_dict[
        dl_helper.NUM_EX_PER_TRAIN_ARG_NAME]

    top_validation_dir_name = argument_dict[dl_helper.VALIDATION_DIR_ARG_NAME]
    first_validation_time_string = argument_dict[
        dl_helper.FIRST_VALIDATION_TIME_ARG_NAME]
    last_validation_time_string = argument_dict[
        dl_helper.LAST_VALIDATION_TIME_ARG_NAME]
    num_examples_per_validn_batch = argument_dict[
        dl_helper.NUM_EX_PER_VALIDN_ARG_NAME]

    num_epochs = argument_dict[dl_helper.NUM_EPOCHS_ARG_NAME]
    num_training_batches_per_epoch = argument_dict[
        dl_helper.NUM_TRAINING_BATCHES_ARG_NAME]
    num_validation_batches_per_epoch = argument_dict[
        dl_helper.NUM_VALIDATION_BATCHES_ARG_NAME]
    output_dir_name = argument_dict[dl_helper.OUTPUT_DIR_ARG_NAME]

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    num_channels = len(radar_field_name_by_channel)
    layer_operation_dicts = [{}] * num_channels

    for k in range(num_channels):
        layer_operation_dicts[k] = {
            input_examples.RADAR_FIELD_KEY: radar_field_name_by_channel[k],
            input_examples.OPERATION_NAME_KEY: layer_op_name_by_channel[k],
            input_examples.MIN_HEIGHT_KEY: min_height_by_channel_m_agl[k],
            input_examples.MAX_HEIGHT_KEY: max_height_by_channel_m_agl[k]
        }

    if len(downsampling_classes) > 1:
        downsampling_dict = dict(
            list(zip(downsampling_classes, downsampling_fractions)))
    else:
        downsampling_dict = None

    translate_flag = (len(x_translations_pixels) > 1
                      or x_translations_pixels[0] != 0
                      or y_translations_pixels[0] != 0)

    if not translate_flag:
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Write metadata.
    metadata_dict = {
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.CONV_2D3D_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec,
        cnn.LAYER_OPERATIONS_KEY: layer_operation_dicts,
        cnn.NUM_EX_PER_VALIDN_BATCH_KEY: num_examples_per_validn_batch
    }

    input_tensor = model_object.input
    if isinstance(input_tensor, list):
        input_tensor = input_tensor[0]

    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.TARGET_NAME_KEY: target_name,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_train_batch,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: downsampling_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y
    }

    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)
    metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    print('Writing metadata to: "{0:s}"...'.format(metafile_name))
    cnn.write_model_metadata(pickle_file_name=metafile_name,
                             metadata_dict=metadata_dict,
                             training_option_dict=training_option_dict)

    return metadata_dict, training_option_dict
def _run(input_model_file_name, radar_field_names, sounding_field_names,
         normalization_type_string, normalization_param_file_name,
         min_normalized_value, max_normalized_value, downsampling_keys,
         downsampling_fractions, monitor_string, weight_loss_function,
         refl_masking_threshold_dbz, x_translations_pixels,
         y_translations_pixels, ccw_rotation_angles_deg,
         noise_standard_deviation, num_noisings, flip_in_x, flip_in_y,
         top_training_dir_name, first_training_time_string,
         last_training_time_string, top_validation_dir_name,
         first_validation_time_string, last_validation_time_string,
         num_examples_per_batch, num_epochs, num_training_batches_per_epoch,
         num_validation_batches_per_epoch, output_dir_name):
    """Trains CNN with native (3-D) GridRad images.

    This is effectively the main method.

    :param input_model_file_name: See documentation at top of file.
    :param radar_field_names: Same.
    :param sounding_field_names: Same.
    :param normalization_type_string: Same.
    :param normalization_param_file_name: Same.
    :param min_normalized_value: Same.
    :param max_normalized_value: Same.
    :param downsampling_keys: Same.
    :param downsampling_fractions: Same.
    :param monitor_string: Same.
    :param weight_loss_function: Same.
    :param refl_masking_threshold_dbz: Same.
    :param x_translations_pixels: Same.
    :param y_translations_pixels: Same.
    :param ccw_rotation_angles_deg: Same.
    :param noise_standard_deviation: Same.
    :param num_noisings: Same.
    :param flip_in_x: Same.
    :param flip_in_y: Same.
    :param top_training_dir_name: Same.
    :param first_training_time_string: Same.
    :param last_training_time_string: Same.
    :param top_validation_dir_name: Same.
    :param first_validation_time_string: Same.
    :param last_validation_time_string: Same.
    :param num_examples_per_batch: Same.
    :param num_epochs: Same.
    :param num_training_batches_per_epoch: Same.
    :param num_validation_batches_per_epoch: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    if sounding_field_names[0] in ['', 'None']:
        sounding_field_names = None

    if len(downsampling_keys) > 1:
        class_to_sampling_fraction_dict = dict(
            zip(downsampling_keys, downsampling_fractions))
    else:
        class_to_sampling_fraction_dict = None

    if (len(x_translations_pixels) == 1
            and x_translations_pixels + y_translations_pixels == 0):
        x_translations_pixels = None
        y_translations_pixels = None

    if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0:
        ccw_rotation_angles_deg = None

    if num_noisings <= 0:
        num_noisings = 0
        noise_standard_deviation = None

    # Set output locations.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)

    output_model_file_name = '{0:s}/model.h5'.format(output_dir_name)
    history_file_name = '{0:s}/model_history.csv'.format(output_dir_name)
    tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name,
        shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER,
        raise_error_if_any_missing=False)

    # Read architecture.
    print 'Reading architecture from: "{0:s}"...'.format(input_model_file_name)
    model_object = cnn.read_model(input_model_file_name)
    model_object = keras.models.clone_model(model_object)

    # TODO(thunderhoser): This is a HACK.
    model_object.compile(loss=keras.losses.binary_crossentropy,
                         optimizer=keras.optimizers.Adam(),
                         metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST)

    print SEPARATOR_STRING
    model_object.summary()
    print SEPARATOR_STRING

    # Write metadata.
    this_example_dict = input_examples.read_example_file(
        netcdf_file_name=training_file_names[0], metadata_only=True)
    target_name = this_example_dict[input_examples.TARGET_NAME_KEY]

    metadata_dict = {
        cnn.TARGET_NAME_KEY: target_name,
        cnn.NUM_EPOCHS_KEY: num_epochs,
        cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch,
        cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch,
        cnn.MONITOR_STRING_KEY: monitor_string,
        cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function,
        cnn.USE_2D3D_CONVOLUTION_KEY: False,
        cnn.VALIDATION_FILES_KEY: validation_file_names,
        cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec,
        cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec
    }

    input_tensor = model_object.input
    if isinstance(input_tensor, list):
        input_tensor = input_tensor[0]

    num_grid_rows = input_tensor.get_shape().as_list()[1]
    num_grid_columns = input_tensor.get_shape().as_list()[2]

    training_option_dict = {
        trainval_io.EXAMPLE_FILES_KEY: training_file_names,
        trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec,
        trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec,
        trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_batch,
        trainval_io.RADAR_FIELDS_KEY: radar_field_names,
        trainval_io.RADAR_HEIGHTS_KEY: RADAR_HEIGHTS_M_AGL,
        trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names,
        trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL,
        trainval_io.NUM_ROWS_KEY: num_grid_rows,
        trainval_io.NUM_COLUMNS_KEY: num_grid_columns,
        trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string,
        trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name,
        trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value,
        trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value,
        trainval_io.BINARIZE_TARGET_KEY: False,
        trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict,
        trainval_io.LOOP_ONCE_KEY: False,
        trainval_io.REFLECTIVITY_MASK_KEY: refl_masking_threshold_dbz,
        trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels,
        trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels,
        trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg,
        trainval_io.NOISE_STDEV_KEY: noise_standard_deviation,
        trainval_io.NUM_NOISINGS_KEY: num_noisings,
        trainval_io.FLIP_X_KEY: flip_in_x,
        trainval_io.FLIP_Y_KEY: flip_in_y
    }

    print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name)
    cnn.write_model_metadata(pickle_file_name=model_metafile_name,
                             metadata_dict=metadata_dict,
                             training_option_dict=training_option_dict)

    cnn.train_cnn_2d_or_3d(
        model_object=model_object,
        model_file_name=output_model_file_name,
        history_file_name=history_file_name,
        tensorboard_dir_name=tensorboard_dir_name,
        num_epochs=num_epochs,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_option_dict=training_option_dict,
        monitor_string=monitor_string,
        weight_loss_function=weight_loss_function,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_file_names=validation_file_names,
        first_validn_time_unix_sec=first_validation_time_unix_sec,
        last_validn_time_unix_sec=last_validation_time_unix_sec)
def _write_metadata_one_model(argument_dict):
    """Writes metadata for one upconvnet to file.

    :param argument_dict: See doc for `_train_one_upconvnet`.
    :return: metadata_dict: See doc for `upconvnet.write_model_metadata`.
    """

    from gewittergefahr.deep_learning import cnn
    from gewittergefahr.deep_learning import upconvnet
    from gewittergefahr.deep_learning import input_examples
    from gewittergefahr.scripts import train_upconvnet

    # Read input args.
    cnn_file_name = argument_dict[train_upconvnet.CNN_FILE_ARG_NAME]
    cnn_feature_layer_name = argument_dict[
        train_upconvnet.FEATURE_LAYER_ARG_NAME]

    top_training_dir_name = argument_dict[train_upconvnet.TRAINING_DIR_ARG_NAME]
    first_training_time_string = argument_dict[
        train_upconvnet.FIRST_TRAINING_TIME_ARG_NAME
    ]
    last_training_time_string = argument_dict[
        train_upconvnet.LAST_TRAINING_TIME_ARG_NAME]

    top_validation_dir_name = argument_dict[
        train_upconvnet.VALIDATION_DIR_ARG_NAME
    ]
    first_validation_time_string = argument_dict[
        train_upconvnet.FIRST_VALIDATION_TIME_ARG_NAME
    ]
    last_validation_time_string = argument_dict[
        train_upconvnet.LAST_VALIDATION_TIME_ARG_NAME]

    num_examples_per_batch = argument_dict[
        train_upconvnet.NUM_EX_PER_BATCH_ARG_NAME
    ]
    num_epochs = argument_dict[train_upconvnet.NUM_EPOCHS_ARG_NAME]
    num_training_batches_per_epoch = argument_dict[
        train_upconvnet.NUM_TRAINING_BATCHES_ARG_NAME
    ]
    num_validation_batches_per_epoch = argument_dict[
        train_upconvnet.NUM_VALIDATION_BATCHES_ARG_NAME
    ]
    output_dir_name = argument_dict[train_upconvnet.OUTPUT_DIR_ARG_NAME]

    # Process input args.
    first_training_time_unix_sec = time_conversion.string_to_unix_sec(
        first_training_time_string, TIME_FORMAT)
    last_training_time_unix_sec = time_conversion.string_to_unix_sec(
        last_training_time_string, TIME_FORMAT)

    first_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        first_validation_time_string, TIME_FORMAT)
    last_validation_time_unix_sec = time_conversion.string_to_unix_sec(
        last_validation_time_string, TIME_FORMAT)

    # Find training and validation files.
    training_file_names = input_examples.find_many_example_files(
        top_directory_name=top_training_dir_name, shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False)

    validation_file_names = input_examples.find_many_example_files(
        top_directory_name=top_validation_dir_name, shuffled=True,
        first_batch_number=FIRST_BATCH_NUMBER,
        last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False)

    # Write metadata.
    upconvnet_metafile_name = cnn.find_metafile(
        model_file_name='{0:s}/foo.h5'.format(output_dir_name),
        raise_error_if_missing=False
    )
    print('Writing upconvnet metadata to: "{0:s}"...'.format(
        upconvnet_metafile_name
    ))

    return upconvnet.write_model_metadata(
        cnn_file_name=cnn_file_name,
        cnn_feature_layer_name=cnn_feature_layer_name, num_epochs=num_epochs,
        num_examples_per_batch=num_examples_per_batch,
        num_training_batches_per_epoch=num_training_batches_per_epoch,
        training_example_file_names=training_file_names,
        first_training_time_unix_sec=first_training_time_unix_sec,
        last_training_time_unix_sec=last_training_time_unix_sec,
        num_validation_batches_per_epoch=num_validation_batches_per_epoch,
        validation_example_file_names=validation_file_names,
        first_validation_time_unix_sec=first_validation_time_unix_sec,
        last_validation_time_unix_sec=last_validation_time_unix_sec,
        pickle_file_name=upconvnet_metafile_name)
Exemplo n.º 17
0
def _run(model_file_name, top_example_dir_name, first_spc_date_string,
         last_spc_date_string, num_examples, class_fraction_keys,
         class_fraction_values, output_dir_name):
    """Makes predictions from trained CNN.

    This is effectively the main method.

    :param model_file_name: See documentation at top of file.
    :param top_example_dir_name: Same.
    :param first_spc_date_string: Same.
    :param last_spc_date_string: Same.
    :param num_examples: Same.
    :param class_fraction_keys: Same.
    :param class_fraction_values: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if the model does multi-class classification.
    """

    print('Reading model from: "{0:s}"...'.format(model_file_name))
    model_object = cnn.read_model(model_file_name)

    num_output_neurons = (
        model_object.layers[-1].output.get_shape().as_list()[-1]
    )

    if num_output_neurons > 2:
        error_string = (
            'The model has {0:d} output neurons, which suggests {0:d}-class '
            'classification.  This script handles only binary classification.'
        ).format(num_output_neurons)

        raise ValueError(error_string)

    soundings_only = False

    if isinstance(model_object.input, list):
        list_of_input_tensors = model_object.input
    else:
        list_of_input_tensors = [model_object.input]

    if len(list_of_input_tensors) == 1:
        these_spatial_dim = numpy.array(
            list_of_input_tensors[0].get_shape().as_list()[1:-1], dtype=int
        )
        soundings_only = len(these_spatial_dim) == 1

    model_directory_name, _ = os.path.split(model_file_name)
    model_metafile_name = '{0:s}/model_metadata.p'.format(model_directory_name)

    print('Reading metadata from: "{0:s}"...'.format(model_metafile_name))
    model_metadata_dict = cnn.read_model_metadata(model_metafile_name)
    training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY]

    if len(class_fraction_keys) > 1:
        downsampling_dict = dict(list(zip(
            class_fraction_keys, class_fraction_values
        )))
    else:
        downsampling_dict = None

    training_option_dict[trainval_io.SAMPLING_FRACTIONS_KEY] = downsampling_dict

    example_file_names = input_examples.find_many_example_files(
        top_directory_name=top_example_dir_name, shuffled=False,
        first_spc_date_string=first_spc_date_string,
        last_spc_date_string=last_spc_date_string,
        raise_error_if_any_missing=False)

    training_option_dict[trainval_io.EXAMPLE_FILES_KEY] = example_file_names
    training_option_dict[trainval_io.FIRST_STORM_TIME_KEY] = (
        time_conversion.get_start_of_spc_date(first_spc_date_string)
    )
    training_option_dict[trainval_io.LAST_STORM_TIME_KEY] = (
        time_conversion.get_end_of_spc_date(last_spc_date_string)
    )
    training_option_dict[trainval_io.NUM_EXAMPLES_PER_BATCH_KEY] = (
        NUM_EXAMPLES_PER_BATCH
    )

    if soundings_only:
        generator_object = testing_io.sounding_generator(
            option_dict=training_option_dict,
            desired_num_examples=num_examples)

    elif model_metadata_dict[cnn.LAYER_OPERATIONS_KEY] is not None:
        generator_object = testing_io.gridrad_generator_2d_reduced(
            option_dict=training_option_dict,
            desired_num_examples=num_examples,
            list_of_operation_dicts=model_metadata_dict[
                cnn.LAYER_OPERATIONS_KEY]
        )

    elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
        generator_object = testing_io.myrorss_generator_2d3d(
            option_dict=training_option_dict,
            desired_num_examples=num_examples)
    else:
        generator_object = testing_io.generator_2d_or_3d(
            option_dict=training_option_dict,
            desired_num_examples=num_examples)

    include_soundings = (
        training_option_dict[trainval_io.SOUNDING_FIELDS_KEY] is not None
    )

    full_storm_id_strings = []
    storm_times_unix_sec = numpy.array([], dtype=int)
    observed_labels = numpy.array([], dtype=int)
    class_probability_matrix = None

    while True:
        try:
            this_storm_object_dict = next(generator_object)
            print(SEPARATOR_STRING)
        except StopIteration:
            break

        full_storm_id_strings += this_storm_object_dict[testing_io.FULL_IDS_KEY]
        storm_times_unix_sec = numpy.concatenate((
            storm_times_unix_sec,
            this_storm_object_dict[testing_io.STORM_TIMES_KEY]
        ))
        observed_labels = numpy.concatenate((
            observed_labels, this_storm_object_dict[testing_io.TARGET_ARRAY_KEY]
        ))

        if soundings_only:
            these_predictor_matrices = [
                this_storm_object_dict[testing_io.SOUNDING_MATRIX_KEY]
            ]
        else:
            these_predictor_matrices = this_storm_object_dict[
                testing_io.INPUT_MATRICES_KEY]

        if include_soundings:
            this_sounding_matrix = these_predictor_matrices[-1]
        else:
            this_sounding_matrix = None

        if soundings_only:
            this_probability_matrix = cnn.apply_cnn_soundings_only(
                model_object=model_object, sounding_matrix=this_sounding_matrix,
                verbose=True)
        elif model_metadata_dict[cnn.CONV_2D3D_KEY]:
            if training_option_dict[trainval_io.UPSAMPLE_REFLECTIVITY_KEY]:
                this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                    model_object=model_object,
                    radar_image_matrix=these_predictor_matrices[0],
                    sounding_matrix=this_sounding_matrix, verbose=True)
            else:
                this_probability_matrix = cnn.apply_2d3d_cnn(
                    model_object=model_object,
                    reflectivity_matrix_dbz=these_predictor_matrices[0],
                    azimuthal_shear_matrix_s01=these_predictor_matrices[1],
                    sounding_matrix=this_sounding_matrix, verbose=True)
        else:
            this_probability_matrix = cnn.apply_2d_or_3d_cnn(
                model_object=model_object,
                radar_image_matrix=these_predictor_matrices[0],
                sounding_matrix=this_sounding_matrix, verbose=True)

        print(SEPARATOR_STRING)

        if class_probability_matrix is None:
            class_probability_matrix = this_probability_matrix + 0.
        else:
            class_probability_matrix = numpy.concatenate(
                (class_probability_matrix, this_probability_matrix), axis=0
            )

    output_file_name = prediction_io.find_ungridded_file(
        directory_name=output_dir_name, raise_error_if_missing=False)

    print('Writing results to: "{0:s}"...'.format(output_file_name))

    prediction_io.write_ungridded_predictions(
        netcdf_file_name=output_file_name,
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels, storm_ids=full_storm_id_strings,
        storm_times_unix_sec=storm_times_unix_sec,
        target_name=training_option_dict[trainval_io.TARGET_NAME_KEY],
        model_file_name=model_file_name
    )