def _convert_one_file(input_file_name, output_file_name, num_examples_per_batch): """Converts examples in one file from MYRORSS to GridRad format. :param input_file_name: Path to input file (with MYRORSS examples). Will be read by `input_examples.read_example_file`. :param output_file_name: Path to output file (with the same examples but in GridRad format). Will be written by `input_examples.write_example_file`. :param num_examples_per_batch: See documentation at top of file. """ print('Reading metadata from: "{0:s}"...'.format(input_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=input_file_name, read_all_target_vars=True, metadata_only=True) full_storm_id_strings = example_dict[input_examples.FULL_IDS_KEY] storm_times_unix_sec = example_dict[input_examples.STORM_TIMES_KEY] num_examples = len(full_storm_id_strings) for i in range(0, num_examples, num_examples_per_batch): this_first_index = i this_last_index = min( [i + num_examples_per_batch - 1, num_examples - 1]) _convert_one_file_selected_examples( input_file_name=input_file_name, output_file_name=output_file_name, full_storm_id_strings=full_storm_id_strings[this_first_index:( this_last_index + 1)], storm_times_unix_sec=storm_times_unix_sec[this_first_index:( this_last_index + 1)], append_to_file=i > 0)
def _find_input_files( top_input_dir_name, first_spc_date_string, last_spc_date_string): """Finds input files (containing unshuffled examples). :param top_input_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :return: input_example_file_names: 1-D list of paths to input files. :return: num_input_examples: Total number of examples in these files. """ input_example_file_names = input_examples.find_many_example_files( top_directory_name=top_input_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) num_input_examples = 0 for this_file_name in input_example_file_names: print 'Reading data from: "{0:s}"...'.format(this_file_name) this_example_dict = input_examples.read_example_file( netcdf_file_name=this_file_name, metadata_only=True) num_input_examples += len( this_example_dict[input_examples.STORM_IDS_KEY]) return input_example_file_names, num_input_examples
def _shuffle_one_input_file(input_example_file_name, radar_field_names, num_examples_per_out_chunk, output_example_file_names): """Shuffles examples from one input file to many output files. :param input_example_file_name: Path to input file. :param radar_field_names: See documentation at top of file. :param num_examples_per_out_chunk: Same. :param output_example_file_names: 1-D list of paths to output files. """ print('Reading data from: "{0:s}"...'.format(input_example_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=input_example_file_name, read_all_target_vars=True, radar_field_names_to_keep=radar_field_names) num_examples = len(example_dict[input_examples.FULL_IDS_KEY]) shuffled_indices = numpy.linspace(0, num_examples - 1, num=num_examples, dtype=int) numpy.random.shuffle(shuffled_indices) example_dict = input_examples.subset_examples( example_dict=example_dict, indices_to_keep=shuffled_indices) for j in range(0, num_examples, num_examples_per_out_chunk): this_first_index = j this_last_index = min( [j + num_examples_per_out_chunk - 1, num_examples - 1]) these_indices = numpy.linspace(this_first_index, this_last_index, num=this_last_index - this_first_index + 1, dtype=int) this_example_dict = input_examples.subset_examples( example_dict=example_dict, indices_to_keep=these_indices, create_new_dict=True) this_output_file_name = random.choice(output_example_file_names) print('Writing shuffled examples to: "{0:s}"...'.format( this_output_file_name)) input_examples.write_example_file( netcdf_file_name=this_output_file_name, example_dict=this_example_dict, append_to_file=os.path.isfile(this_output_file_name))
def _check_training_args(model_file_name, history_file_name, tensorboard_dir_name, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, training_option_dict, weight_loss_function): """Error-checks input arguments for training. :param model_file_name: Path to output file (HDF5 format). The model will be saved here after each epoch. :param history_file_name: Path to output file (CSV format). Training history (performance metrics) will be saved here after each epoch. :param tensorboard_dir_name: Path to output directory for TensorBoard log files. :param num_epochs: Number of epochs. :param num_training_batches_per_epoch: Number of training batches in each epoch. :param num_validation_batches_per_epoch: Number of validation batches in each epoch. :param training_option_dict: See doc for `training_validation_io.example_generator_2d_or_3d`. :param weight_loss_function: Boolean flag. If False, classes will be weighted equally in the loss function. If True, classes will be weighted differently (inversely proportional to their sampling fractions). :return: class_to_weight_dict: Dictionary, where each key is the integer ID for a target class (-2 for "dead storm") and each value is the weight for the loss function. If None, classes will be equally weighted in the loss function. """ orig_option_dict = training_option_dict.copy() training_option_dict = trainval_io.DEFAULT_OPTION_DICT.copy() training_option_dict.update(orig_option_dict) file_system_utils.mkdir_recursive_if_necessary(file_name=model_file_name) file_system_utils.mkdir_recursive_if_necessary(file_name=history_file_name) file_system_utils.mkdir_recursive_if_necessary( directory_name=tensorboard_dir_name) error_checking.assert_is_integer(num_epochs) error_checking.assert_is_geq(num_epochs, 1) error_checking.assert_is_integer(num_training_batches_per_epoch) error_checking.assert_is_geq(num_training_batches_per_epoch, 1) error_checking.assert_is_integer(num_validation_batches_per_epoch) error_checking.assert_is_geq(num_validation_batches_per_epoch, 0) error_checking.assert_is_boolean(weight_loss_function) if not weight_loss_function: return None class_to_sampling_fraction_dict = training_option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] if class_to_sampling_fraction_dict is None: return None this_example_dict = input_examples.read_example_file( netcdf_file_name=training_option_dict[ trainval_io.EXAMPLE_FILES_KEY][0], metadata_only=True) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] return dl_utils.class_fractions_to_weights( sampling_fraction_by_class_dict=class_to_sampling_fraction_dict, target_name=target_name, binarize_target=training_option_dict[trainval_io.BINARIZE_TARGET_KEY])
def _run(input_model_file_name, radar_field_names, sounding_field_names, normalization_type_string, normalization_param_file_name, min_normalized_value, max_normalized_value, downsampling_keys, downsampling_fractions, monitor_string, weight_loss_function, refl_masking_threshold_dbz, x_translations_pixels, y_translations_pixels, ccw_rotation_angles_deg, noise_standard_deviation, num_noisings, flip_in_x, flip_in_y, top_training_dir_name, first_training_time_string, last_training_time_string, top_validation_dir_name, first_validation_time_string, last_validation_time_string, num_examples_per_batch, num_epochs, num_training_batches_per_epoch, num_validation_batches_per_epoch, output_dir_name): """Trains CNN with native (3-D) GridRad images. This is effectively the main method. :param input_model_file_name: See documentation at top of file. :param radar_field_names: Same. :param sounding_field_names: Same. :param normalization_type_string: Same. :param normalization_param_file_name: Same. :param min_normalized_value: Same. :param max_normalized_value: Same. :param downsampling_keys: Same. :param downsampling_fractions: Same. :param monitor_string: Same. :param weight_loss_function: Same. :param refl_masking_threshold_dbz: Same. :param x_translations_pixels: Same. :param y_translations_pixels: Same. :param ccw_rotation_angles_deg: Same. :param noise_standard_deviation: Same. :param num_noisings: Same. :param flip_in_x: Same. :param flip_in_y: Same. :param top_training_dir_name: Same. :param first_training_time_string: Same. :param last_training_time_string: Same. :param top_validation_dir_name: Same. :param first_validation_time_string: Same. :param last_validation_time_string: Same. :param num_examples_per_batch: Same. :param num_epochs: Same. :param num_training_batches_per_epoch: Same. :param num_validation_batches_per_epoch: Same. :param output_dir_name: Same. """ # Process input args. first_training_time_unix_sec = time_conversion.string_to_unix_sec( first_training_time_string, TIME_FORMAT) last_training_time_unix_sec = time_conversion.string_to_unix_sec( last_training_time_string, TIME_FORMAT) first_validation_time_unix_sec = time_conversion.string_to_unix_sec( first_validation_time_string, TIME_FORMAT) last_validation_time_unix_sec = time_conversion.string_to_unix_sec( last_validation_time_string, TIME_FORMAT) if sounding_field_names[0] in ['', 'None']: sounding_field_names = None if len(downsampling_keys) > 1: class_to_sampling_fraction_dict = dict( zip(downsampling_keys, downsampling_fractions)) else: class_to_sampling_fraction_dict = None if (len(x_translations_pixels) == 1 and x_translations_pixels + y_translations_pixels == 0): x_translations_pixels = None y_translations_pixels = None if len(ccw_rotation_angles_deg) == 1 and ccw_rotation_angles_deg[0] == 0: ccw_rotation_angles_deg = None if num_noisings <= 0: num_noisings = 0 noise_standard_deviation = None # Set output locations. file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) output_model_file_name = '{0:s}/model.h5'.format(output_dir_name) history_file_name = '{0:s}/model_history.csv'.format(output_dir_name) tensorboard_dir_name = '{0:s}/tensorboard'.format(output_dir_name) model_metafile_name = '{0:s}/model_metadata.p'.format(output_dir_name) # Find training and validation files. training_file_names = input_examples.find_many_example_files( top_directory_name=top_training_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) validation_file_names = input_examples.find_many_example_files( top_directory_name=top_validation_dir_name, shuffled=True, first_batch_number=FIRST_BATCH_NUMBER, last_batch_number=LAST_BATCH_NUMBER, raise_error_if_any_missing=False) # Read architecture. print 'Reading architecture from: "{0:s}"...'.format(input_model_file_name) model_object = cnn.read_model(input_model_file_name) model_object = keras.models.clone_model(model_object) # TODO(thunderhoser): This is a HACK. model_object.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.Adam(), metrics=cnn_setup.DEFAULT_METRIC_FUNCTION_LIST) print SEPARATOR_STRING model_object.summary() print SEPARATOR_STRING # Write metadata. this_example_dict = input_examples.read_example_file( netcdf_file_name=training_file_names[0], metadata_only=True) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] metadata_dict = { cnn.TARGET_NAME_KEY: target_name, cnn.NUM_EPOCHS_KEY: num_epochs, cnn.NUM_TRAINING_BATCHES_KEY: num_training_batches_per_epoch, cnn.NUM_VALIDATION_BATCHES_KEY: num_validation_batches_per_epoch, cnn.MONITOR_STRING_KEY: monitor_string, cnn.WEIGHT_LOSS_FUNCTION_KEY: weight_loss_function, cnn.USE_2D3D_CONVOLUTION_KEY: False, cnn.VALIDATION_FILES_KEY: validation_file_names, cnn.FIRST_VALIDN_TIME_KEY: first_validation_time_unix_sec, cnn.LAST_VALIDN_TIME_KEY: last_validation_time_unix_sec } input_tensor = model_object.input if isinstance(input_tensor, list): input_tensor = input_tensor[0] num_grid_rows = input_tensor.get_shape().as_list()[1] num_grid_columns = input_tensor.get_shape().as_list()[2] training_option_dict = { trainval_io.EXAMPLE_FILES_KEY: training_file_names, trainval_io.FIRST_STORM_TIME_KEY: first_training_time_unix_sec, trainval_io.LAST_STORM_TIME_KEY: last_training_time_unix_sec, trainval_io.NUM_EXAMPLES_PER_BATCH_KEY: num_examples_per_batch, trainval_io.RADAR_FIELDS_KEY: radar_field_names, trainval_io.RADAR_HEIGHTS_KEY: RADAR_HEIGHTS_M_AGL, trainval_io.SOUNDING_FIELDS_KEY: sounding_field_names, trainval_io.SOUNDING_HEIGHTS_KEY: SOUNDING_HEIGHTS_M_AGL, trainval_io.NUM_ROWS_KEY: num_grid_rows, trainval_io.NUM_COLUMNS_KEY: num_grid_columns, trainval_io.NORMALIZATION_TYPE_KEY: normalization_type_string, trainval_io.NORMALIZATION_FILE_KEY: normalization_param_file_name, trainval_io.MIN_NORMALIZED_VALUE_KEY: min_normalized_value, trainval_io.MAX_NORMALIZED_VALUE_KEY: max_normalized_value, trainval_io.BINARIZE_TARGET_KEY: False, trainval_io.SAMPLING_FRACTIONS_KEY: class_to_sampling_fraction_dict, trainval_io.LOOP_ONCE_KEY: False, trainval_io.REFLECTIVITY_MASK_KEY: refl_masking_threshold_dbz, trainval_io.X_TRANSLATIONS_KEY: x_translations_pixels, trainval_io.Y_TRANSLATIONS_KEY: y_translations_pixels, trainval_io.ROTATION_ANGLES_KEY: ccw_rotation_angles_deg, trainval_io.NOISE_STDEV_KEY: noise_standard_deviation, trainval_io.NUM_NOISINGS_KEY: num_noisings, trainval_io.FLIP_X_KEY: flip_in_x, trainval_io.FLIP_Y_KEY: flip_in_y } print 'Writing metadata to: "{0:s}"...'.format(model_metafile_name) cnn.write_model_metadata(pickle_file_name=model_metafile_name, metadata_dict=metadata_dict, training_option_dict=training_option_dict) cnn.train_cnn_2d_or_3d( model_object=model_object, model_file_name=output_model_file_name, history_file_name=history_file_name, tensorboard_dir_name=tensorboard_dir_name, num_epochs=num_epochs, num_training_batches_per_epoch=num_training_batches_per_epoch, training_option_dict=training_option_dict, monitor_string=monitor_string, weight_loss_function=weight_loss_function, num_validation_batches_per_epoch=num_validation_batches_per_epoch, validation_file_names=validation_file_names, first_validn_time_unix_sec=first_validation_time_unix_sec, last_validn_time_unix_sec=last_validation_time_unix_sec)
def _convert_one_file(input_file_name, resolution_factor, output_file_name): """Converts examples in one file from GridRad to MYRORSS format. :param input_file_name: Path to input file (with GridRad examples). Will be read by `input_examples.read_example_file`. :param resolution_factor: See documentation at top of file. :param output_file_name: Path to output file (with the same examples but in MYRORSS format). Will be written by `input_examples.write_example_file`. """ print('Reading GridRad examples from: "{0:s}"...'.format(input_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=input_file_name, read_all_target_vars=True) refl_heights_m_agl = example_dict[input_examples.RADAR_HEIGHTS_KEY] + 0 refl_index = example_dict[input_examples.RADAR_FIELDS_KEY].index( radar_utils.REFL_NAME) reflectivity_matrix_dbz = trainval_io.upsample_reflectivity( reflectivity_matrix_dbz=example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., refl_index], upsampling_factor=resolution_factor ) reflectivity_matrix_dbz = numpy.expand_dims( reflectivity_matrix_dbz, axis=-1) example_dict = input_examples.reduce_examples_3d_to_2d( example_dict=example_dict, list_of_operation_dicts=[ LL_SHEAR_OPERATION_DICT, ML_SHEAR_OPERATION_DICT ] ) field_names = example_dict[input_examples.RADAR_FIELDS_KEY] min_heights_m_asl = example_dict[input_examples.MIN_RADAR_HEIGHTS_KEY] ll_shear_index = numpy.where(numpy.logical_and( numpy.array(field_names) == radar_utils.VORTICITY_NAME, min_heights_m_asl == LL_SHEAR_OPERATION_DICT[input_examples.MIN_HEIGHT_KEY] ))[0] ll_shear_matrix_s01 = trainval_io.upsample_reflectivity( reflectivity_matrix_dbz=example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., ll_shear_index], upsampling_factor=resolution_factor * 2 ) ml_shear_index = numpy.where(numpy.logical_and( numpy.array(field_names) == radar_utils.VORTICITY_NAME, min_heights_m_asl == ML_SHEAR_OPERATION_DICT[input_examples.MIN_HEIGHT_KEY] ))[0] ml_shear_matrix_s01 = trainval_io.upsample_reflectivity( reflectivity_matrix_dbz=example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., ml_shear_index], upsampling_factor=resolution_factor * 2 ) azimuthal_shear_matrix_s01 = VORTICITY_TO_AZ_SHEAR * numpy.concatenate( (ll_shear_matrix_s01, ml_shear_matrix_s01), axis=-1 ) example_dict[input_examples.REFL_IMAGE_MATRIX_KEY] = reflectivity_matrix_dbz example_dict[ input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY] = azimuthal_shear_matrix_s01 example_dict[input_examples.RADAR_HEIGHTS_KEY] = refl_heights_m_agl example_dict[input_examples.RADAR_FIELDS_KEY] = [ radar_utils.LOW_LEVEL_SHEAR_NAME, radar_utils.MID_LEVEL_SHEAR_NAME ] example_dict[input_examples.ROTATED_GRID_SPACING_KEY] /= resolution_factor example_dict.pop(input_examples.RADAR_IMAGE_MATRIX_KEY, None) example_dict.pop(input_examples.MIN_RADAR_HEIGHTS_KEY, None) example_dict.pop(input_examples.MAX_RADAR_HEIGHTS_KEY, None) example_dict.pop(input_examples.RADAR_LAYER_OPERATION_NAMES_KEY, None) print('Writing examples in MYRORSS format to: "{0:s}"...'.format( output_file_name )) input_examples.write_example_file( netcdf_file_name=output_file_name, example_dict=example_dict, append_to_file=False)
def _run(top_example_dir_name, first_spc_date_string, last_spc_date_string, min_percentile_level, max_percentile_level, num_radar_rows, num_radar_columns, output_file_name): """Finds normalization parameters for GridRad data. This is effectively the main method. :param top_example_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param min_percentile_level: Same. :param max_percentile_level: Same. :param num_radar_rows: Same. :param num_radar_columns: Same. :param output_file_name: Same. """ if num_radar_rows <= 0: num_radar_rows = None if num_radar_columns <= 0: num_radar_columns = None first_time_unix_sec = time_conversion.get_start_of_spc_date( first_spc_date_string) last_time_unix_sec = time_conversion.get_end_of_spc_date( last_spc_date_string) # example_file_names = input_examples.find_many_example_files( # top_directory_name=top_example_dir_name, shuffled=True, # first_batch_number=0, last_batch_number=LARGE_INTEGER, # raise_error_if_any_missing=False) example_file_names = input_examples.find_many_example_files( top_directory_name=top_example_dir_name, shuffled=False, first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string, raise_error_if_any_missing=False) this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[0], read_all_target_vars=True) sounding_field_names = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY] sounding_heights_m_agl = this_example_dict[ input_examples.SOUNDING_HEIGHTS_KEY] if input_examples.REFL_IMAGE_MATRIX_KEY in this_example_dict: num_radar_dimensions = -1 else: num_radar_dimensions = (len( this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY].shape) - 2) # TODO(thunderhoser): Put this in separate method. if num_radar_dimensions == 3: radar_field_names = this_example_dict[input_examples.RADAR_FIELDS_KEY] radar_heights_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] radar_field_name_by_pair = [] radar_height_by_pair_m_agl = numpy.array([], dtype=int) for this_field_name in radar_field_names: radar_field_name_by_pair += ([this_field_name] * len(radar_heights_m_agl)) radar_height_by_pair_m_agl = numpy.concatenate( (radar_height_by_pair_m_agl, radar_heights_m_agl)) elif num_radar_dimensions == 2: radar_field_name_by_pair = this_example_dict[ input_examples.RADAR_FIELDS_KEY] radar_height_by_pair_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] radar_field_names = list(set(radar_field_name_by_pair)) radar_field_names.sort() else: az_shear_field_names = this_example_dict[ input_examples.RADAR_FIELDS_KEY] radar_field_names = [radar_utils.REFL_NAME] + az_shear_field_names refl_heights_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] radar_field_name_by_pair = ( [radar_utils.REFL_NAME] * len(refl_heights_m_agl) + az_shear_field_names) az_shear_heights_m_agl = numpy.full(len(az_shear_field_names), radar_utils.SHEAR_HEIGHT_M_ASL) radar_height_by_pair_m_agl = numpy.concatenate( (refl_heights_m_agl, az_shear_heights_m_agl)).astype(int) # Initialize parameters. orig_parameter_dict = { NUM_VALUES_KEY: 0, MEAN_VALUE_KEY: 0., MEAN_OF_SQUARES_KEY: 0. } radar_z_score_dict_no_height = {} radar_z_score_dict_with_height = {} radar_freq_dict_no_height = {} num_radar_fields = len(radar_field_names) num_radar_field_height_pairs = len(radar_field_name_by_pair) for j in range(num_radar_fields): radar_z_score_dict_no_height[radar_field_names[j]] = copy.deepcopy( orig_parameter_dict) radar_freq_dict_no_height[radar_field_names[j]] = {} for k in range(num_radar_field_height_pairs): radar_z_score_dict_with_height[ radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k]] = copy.deepcopy(orig_parameter_dict) sounding_z_score_dict_no_height = {} sounding_z_score_dict_with_height = {} sounding_freq_dict_no_height = {} num_sounding_fields = len(sounding_field_names) num_sounding_heights = len(sounding_heights_m_agl) for j in range(num_sounding_fields): sounding_z_score_dict_no_height[sounding_field_names[j]] = ( copy.deepcopy(orig_parameter_dict)) sounding_freq_dict_no_height[sounding_field_names[j]] = {} for k in range(num_sounding_heights): sounding_z_score_dict_with_height[ sounding_field_names[j], sounding_heights_m_agl[k]] = copy.deepcopy(orig_parameter_dict) for this_example_file_name in example_file_names: print('Reading data from: "{0:s}"...'.format(this_example_file_name)) this_example_dict = input_examples.read_example_file( netcdf_file_name=this_example_file_name, read_all_target_vars=True, num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns, first_time_to_keep_unix_sec=first_time_unix_sec, last_time_to_keep_unix_sec=last_time_unix_sec) this_num_examples = len(this_example_dict[input_examples.FULL_IDS_KEY]) if this_num_examples == 0: continue for j in range(num_radar_fields): print('Updating normalization params for "{0:s}"...'.format( radar_field_names[j])) if num_radar_dimensions == 3: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_names[j]) this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_field_index] elif num_radar_dimensions == 2: all_field_names = numpy.array( this_example_dict[input_examples.RADAR_FIELDS_KEY]) these_field_indices = numpy.where( all_field_names == radar_field_names[j])[0] this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., these_field_indices] else: if radar_field_names[j] == radar_utils.REFL_NAME: this_radar_matrix = this_example_dict[ input_examples.REFL_IMAGE_MATRIX_KEY][..., 0] else: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_names[j]) this_radar_matrix = this_example_dict[ input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][ ..., this_field_index] radar_z_score_dict_no_height[radar_field_names[j]] = ( _update_z_score_params( z_score_param_dict=radar_z_score_dict_no_height[ radar_field_names[j]], new_data_matrix=this_radar_matrix)) radar_freq_dict_no_height[radar_field_names[j]] = ( _update_frequency_dict( frequency_dict=radar_freq_dict_no_height[ radar_field_names[j]], new_data_matrix=this_radar_matrix, rounding_base=RADAR_INTERVAL_DICT[radar_field_names[j]])) for k in range(num_radar_field_height_pairs): print(('Updating normalization params for "{0:s}" at {1:d} metres ' 'AGL...').format(radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k])) if num_radar_dimensions == 3: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_name_by_pair[k]) this_height_index = numpy.where( this_example_dict[input_examples.RADAR_HEIGHTS_KEY] == radar_height_by_pair_m_agl[k])[0][0] this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_height_index, this_field_index] elif num_radar_dimensions == 2: all_field_names = numpy.array( this_example_dict[input_examples.RADAR_FIELDS_KEY]) all_heights_m_agl = this_example_dict[ input_examples.RADAR_HEIGHTS_KEY] this_index = numpy.where( numpy.logical_and( all_field_names == radar_field_name_by_pair[k], all_heights_m_agl == radar_height_by_pair_m_agl[k]))[0][0] this_radar_matrix = this_example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY][..., this_index] else: if radar_field_name_by_pair[k] == radar_utils.REFL_NAME: this_height_index = numpy.where( this_example_dict[input_examples.RADAR_HEIGHTS_KEY] == radar_height_by_pair_m_agl[k])[0][0] this_radar_matrix = this_example_dict[ input_examples.REFL_IMAGE_MATRIX_KEY][ ..., this_height_index, 0] else: this_field_index = this_example_dict[ input_examples.RADAR_FIELDS_KEY].index( radar_field_name_by_pair[k]) this_radar_matrix = this_example_dict[ input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY][ ..., this_field_index] radar_z_score_dict_with_height[ radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k]] = _update_z_score_params( z_score_param_dict=radar_z_score_dict_with_height[ radar_field_name_by_pair[k], radar_height_by_pair_m_agl[k]], new_data_matrix=this_radar_matrix) for j in range(num_sounding_fields): print('Updating normalization params for "{0:s}"...'.format( sounding_field_names[j])) this_field_index = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY].index( sounding_field_names[j]) this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., this_field_index] sounding_z_score_dict_no_height[sounding_field_names[j]] = ( _update_z_score_params( z_score_param_dict=sounding_z_score_dict_no_height[ sounding_field_names[j]], new_data_matrix=this_sounding_matrix)) sounding_freq_dict_no_height[sounding_field_names[j]] = ( _update_frequency_dict( frequency_dict=sounding_freq_dict_no_height[ sounding_field_names[j]], new_data_matrix=this_sounding_matrix, rounding_base=SOUNDING_INTERVAL_DICT[ sounding_field_names[j]])) for k in range(num_sounding_heights): this_height_index = numpy.where( this_example_dict[input_examples.SOUNDING_HEIGHTS_KEY] == sounding_heights_m_agl[k])[0][0] this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., this_height_index, this_field_index] print(('Updating normalization params for "{0:s}" at {1:d} m ' 'AGL...').format(sounding_field_names[j], sounding_heights_m_agl[k])) sounding_z_score_dict_with_height[ sounding_field_names[j], sounding_heights_m_agl[k]] = _update_z_score_params( z_score_param_dict=sounding_z_score_dict_with_height[ sounding_field_names[j], sounding_heights_m_agl[k]], new_data_matrix=this_sounding_matrix) print(SEPARATOR_STRING) # Convert dictionaries to pandas DataFrames. radar_table_no_height = _convert_normalization_params( z_score_dict_dict=radar_z_score_dict_no_height, frequency_dict_dict=radar_freq_dict_no_height, min_percentile_level=min_percentile_level, max_percentile_level=max_percentile_level) print('Normalization params for each radar field:\n{0:s}\n\n'.format( str(radar_table_no_height))) radar_table_with_height = _convert_normalization_params( z_score_dict_dict=radar_z_score_dict_with_height) print(('Normalization params for each radar field/height pair:\n{0:s}\n\n' ).format(str(radar_table_with_height))) sounding_table_no_height = _convert_normalization_params( z_score_dict_dict=sounding_z_score_dict_no_height, frequency_dict_dict=sounding_freq_dict_no_height, min_percentile_level=min_percentile_level, max_percentile_level=max_percentile_level) print('Normalization params for each sounding field:\n{0:s}\n\n'.format( str(sounding_table_no_height))) sounding_table_with_height = _convert_normalization_params( z_score_dict_dict=sounding_z_score_dict_with_height) print( ('Normalization params for each sounding field/height pair:\n{0:s}\n\n' ).format(str(sounding_table_with_height))) print('Writing normalization params to file: "{0:s}"...'.format( output_file_name)) dl_utils.write_normalization_params( pickle_file_name=output_file_name, radar_table_no_height=radar_table_no_height, radar_table_with_height=radar_table_with_height, sounding_table_no_height=sounding_table_no_height, sounding_table_with_height=sounding_table_with_height)
def _find_examples_to_read(option_dict, num_examples_total): """Determines which examples to read. E = number of examples to read :param option_dict: See doc for any generator in this file. :param num_examples_total: Number of examples to generate. :return: storm_ids: length-E list of storm IDs (strings). :return: storm_times_unix_sec: length-E numpy array of storm times. """ error_checking.assert_is_integer(num_examples_total) error_checking.assert_is_greater(num_examples_total, 0) example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY] radar_field_names = option_dict[trainval_io.RADAR_FIELDS_KEY] radar_heights_m_agl = option_dict[trainval_io.RADAR_HEIGHTS_KEY] first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY] last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY] num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY] num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY] class_to_sampling_fraction_dict = option_dict[ trainval_io.SAMPLING_FRACTIONS_KEY] storm_ids = [] storm_times_unix_sec = numpy.array([], dtype=int) target_values = numpy.array([], dtype=int) target_name = None num_files = len(example_file_names) for i in range(num_files): print 'Reading target values from: "{0:s}"...'.format( example_file_names[i]) this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[i], include_soundings=False, radar_field_names_to_keep=[radar_field_names[0]], radar_heights_to_keep_m_agl=radar_heights_m_agl[[0]], first_time_to_keep_unix_sec=first_storm_time_unix_sec, last_time_to_keep_unix_sec=last_storm_time_unix_sec, num_rows_to_keep=num_grid_rows, num_columns_to_keep=num_grid_columns) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] storm_ids += this_example_dict[input_examples.STORM_IDS_KEY] storm_times_unix_sec = numpy.concatenate(( storm_times_unix_sec, this_example_dict[input_examples.STORM_TIMES_KEY] )) target_values = numpy.concatenate(( target_values, this_example_dict[input_examples.TARGET_VALUES_KEY] )) indices_to_keep = numpy.where( target_values != target_val_utils.INVALID_STORM_INTEGER )[0] storm_ids = [storm_ids[k] for k in indices_to_keep] storm_times_unix_sec = storm_times_unix_sec[indices_to_keep] target_values = target_values[indices_to_keep] num_examples_found = len(storm_ids) if class_to_sampling_fraction_dict is None: indices_to_keep = numpy.linspace( 0, num_examples_found - 1, num=num_examples_found, dtype=int) if num_examples_found > num_examples_total: indices_to_keep = numpy.random.choice( indices_to_keep, size=num_examples_total, replace=False) else: indices_to_keep = dl_utils.sample_by_class( sampling_fraction_by_class_dict=class_to_sampling_fraction_dict, target_name=target_name, target_values=target_values, num_examples_total=num_examples_total) storm_ids = [storm_ids[k] for k in indices_to_keep] storm_times_unix_sec = storm_times_unix_sec[indices_to_keep] return storm_ids, storm_times_unix_sec
def gridrad_generator_2d_reduced(option_dict, list_of_operation_dicts, num_examples_total): """Generates examples with 2-D GridRad images. These 2-D images are produced by applying layer operations to the native 3-D images. The layer operations are specified by `list_of_operation_dicts`. Each example (storm object) consists of the following: - Storm-centered radar images (one 2-D image for each layer operation) - Storm-centered sounding (optional) - Target value (class) :param option_dict: Dictionary with the following keys. option_dict['example_file_names']: See doc for `training_validation_io.gridrad_generator_2d_reduced`. option_dict['binarize_target']: Same. option_dict['sounding_field_names']: Same. option_dict['sounding_heights_m_agl']: Same. option_dict['first_storm_time_unix_sec']: Same. option_dict['last_storm_time_unix_sec']: Same. option_dict['num_grid_rows']: Same. option_dict['num_grid_columns']: Same. option_dict['normalization_type_string']: Same. option_dict['normalization_param_file_name']: Same. option_dict['min_normalized_value']: Same. option_dict['max_normalized_value']: Same. option_dict['class_to_sampling_fraction_dict']: Same. :param list_of_operation_dicts: See doc for `input_examples.reduce_examples_3d_to_2d`. :param num_examples_total: Number of examples to generate. :return: storm_object_dict: Dictionary with the following keys. storm_object_dict['list_of_input_matrices']: length-T list of numpy arrays, where T = number of input tensors to model. The first axis of each array has length E. storm_object_dict['storm_ids']: length-E list of storm IDs. storm_object_dict['storm_times_unix_sec']: length-E numpy array of storm times. storm_object_dict['target_array']: See output doc for `training_validation_io.gridrad_generator_2d_reduced`. storm_object_dict['sounding_pressure_matrix_pascals']: numpy array (E x H_s) of pressures. If soundings were not read, this is None. storm_object_dict['radar_field_names']: length-C list of field names, where the [j]th item corresponds to the [j]th channel of the 2-D radar images returned in "list_of_input_matrices". storm_object_dict['min_radar_heights_m_agl']: length-C numpy array with minimum height for each layer operation (used to reduce 3-D radar images to 2-D). storm_object_dict['max_radar_heights_m_agl']: Same but with max heights. storm_object_dict['radar_layer_operation_names']: length-C list with names of layer operations. Each name must be accepted by `input_examples._check_layer_operation`. """ unique_radar_field_names, unique_radar_heights_m_agl = ( trainval_io.layer_ops_to_field_height_pairs(list_of_operation_dicts) ) option_dict[trainval_io.RADAR_FIELDS_KEY] = unique_radar_field_names option_dict[trainval_io.RADAR_HEIGHTS_KEY] = unique_radar_heights_m_agl storm_ids, storm_times_unix_sec = _find_examples_to_read( option_dict=option_dict, num_examples_total=num_examples_total) print '\n' example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY] first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY] last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY] num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY] num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY] sounding_field_names = option_dict[trainval_io.SOUNDING_FIELDS_KEY] sounding_heights_m_agl = option_dict[trainval_io.SOUNDING_HEIGHTS_KEY] normalization_type_string = option_dict[trainval_io.NORMALIZATION_TYPE_KEY] normalization_param_file_name = option_dict[ trainval_io.NORMALIZATION_FILE_KEY] min_normalized_value = option_dict[trainval_io.MIN_NORMALIZED_VALUE_KEY] max_normalized_value = option_dict[trainval_io.MAX_NORMALIZED_VALUE_KEY] binarize_target = option_dict[trainval_io.BINARIZE_TARGET_KEY] this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[0], metadata_only=True) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] num_classes = target_val_utils.target_name_to_num_classes( target_name=target_name, include_dead_storms=False) if sounding_field_names is None: sounding_field_names_to_read = None else: if soundings.PRESSURE_NAME in sounding_field_names: sounding_field_names_to_read = sounding_field_names + [] else: sounding_field_names_to_read = ( sounding_field_names + [soundings.PRESSURE_NAME] ) radar_image_matrix = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None reduction_metadata_dict = {} file_index = 0 while True: if file_index >= len(example_file_names): raise StopIteration print 'Reading data from: "{0:s}"...'.format( example_file_names[file_index]) this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[file_index], include_soundings=sounding_field_names is not None, radar_field_names_to_keep=unique_radar_field_names, radar_heights_to_keep_m_agl=unique_radar_heights_m_agl, sounding_field_names_to_keep=sounding_field_names_to_read, sounding_heights_to_keep_m_agl=sounding_heights_m_agl, first_time_to_keep_unix_sec=first_storm_time_unix_sec, last_time_to_keep_unix_sec=last_storm_time_unix_sec, num_rows_to_keep=num_grid_rows, num_columns_to_keep=num_grid_columns) file_index += 1 if this_example_dict is None: continue indices_to_keep = tracking_utils.find_storm_objects( all_storm_ids=this_example_dict[input_examples.STORM_IDS_KEY], all_times_unix_sec=this_example_dict[ input_examples.STORM_TIMES_KEY], storm_ids_to_keep=storm_ids, times_to_keep_unix_sec=storm_times_unix_sec, allow_missing=True) indices_to_keep = indices_to_keep[indices_to_keep >= 0] if len(indices_to_keep) == 0: continue this_example_dict = input_examples.subset_examples( example_dict=this_example_dict, indices_to_keep=indices_to_keep) this_example_dict = input_examples.reduce_examples_3d_to_2d( example_dict=this_example_dict, list_of_operation_dicts=list_of_operation_dicts) radar_field_names_2d = this_example_dict[ input_examples.RADAR_FIELDS_KEY] for this_key in REDUCTION_METADATA_KEYS: reduction_metadata_dict[this_key] = this_example_dict[this_key] include_soundings = ( input_examples.SOUNDING_MATRIX_KEY in this_example_dict) if include_soundings: pressure_index = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY ].index(soundings.PRESSURE_NAME) this_pressure_matrix_pascals = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., pressure_index] this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY] if soundings.PRESSURE_NAME not in sounding_field_names: this_sounding_matrix = this_sounding_matrix[..., :-1] if target_values is None: radar_image_matrix = ( this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY] + 0. ) target_values = ( this_example_dict[input_examples.TARGET_VALUES_KEY] + 0) if include_soundings: sounding_matrix = this_sounding_matrix + 0. sounding_pressure_matrix_pascals = ( this_pressure_matrix_pascals + 0.) else: radar_image_matrix = numpy.concatenate( (radar_image_matrix, this_example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY]), axis=0) target_values = numpy.concatenate(( target_values, this_example_dict[input_examples.TARGET_VALUES_KEY] )) if include_soundings: sounding_matrix = numpy.concatenate( (sounding_matrix, this_sounding_matrix), axis=0) sounding_pressure_matrix_pascals = numpy.concatenate( (sounding_pressure_matrix_pascals, this_pressure_matrix_pascals), axis=0) if normalization_type_string is not None: radar_image_matrix = dl_utils.normalize_radar_images( radar_image_matrix=radar_image_matrix, field_names=radar_field_names_2d, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') if include_soundings: sounding_matrix = dl_utils.normalize_soundings( sounding_matrix=sounding_matrix, field_names=sounding_field_names, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') list_of_predictor_matrices = [radar_image_matrix] if include_soundings: list_of_predictor_matrices.append(sounding_matrix) target_array = _finalize_targets( target_values=target_values, binarize_target=binarize_target, num_classes=num_classes) storm_object_dict = { INPUT_MATRICES_KEY: list_of_predictor_matrices, TARGET_ARRAY_KEY: target_array, STORM_IDS_KEY: this_example_dict[input_examples.STORM_IDS_KEY], STORM_TIMES_KEY: this_example_dict[input_examples.STORM_TIMES_KEY], SOUNDING_PRESSURES_KEY: copy.deepcopy(sounding_pressure_matrix_pascals) } for this_key in REDUCTION_METADATA_KEYS: storm_object_dict[this_key] = reduction_metadata_dict[this_key] radar_image_matrix = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None yield storm_object_dict
def myrorss_generator_2d3d(option_dict, num_examples_total): """Generates examples with both 2-D and 3-D radar images. Each example (storm object) consists of the following: - Storm-centered azimuthal shear (one 2-D image for each field) - Storm-centered reflectivity (one 3-D image) - Storm-centered sounding (optional) - Target value (class) :param option_dict: Dictionary with the following keys. option_dict['example_file_names']: See doc for `training_validation_io.myrorss_generator_2d3d`. option_dict['binarize_target']: Same. option_dict['radar_field_names']: Same. option_dict['radar_heights_m_agl']: Same. option_dict['sounding_field_names']: Same. option_dict['sounding_heights_m_agl']: Same. option_dict['first_storm_time_unix_sec']: Same. option_dict['last_storm_time_unix_sec']: Same. option_dict['num_grid_rows']: Same. option_dict['num_grid_columns']: Same. option_dict['normalization_type_string']: See doc for `generator_2d_or_3d`. option_dict['normalization_param_file_name']: Same. option_dict['min_normalized_value']: Same. option_dict['max_normalized_value']: Same. option_dict['class_to_sampling_fraction_dict']: Same. :param num_examples_total: Total number of examples to generate. :return: storm_object_dict: Dictionary with the following keys. storm_object_dict['list_of_input_matrices']: length-T list of numpy arrays, where T = number of input tensors to model. The first axis of each array has length E. storm_object_dict['storm_ids']: length-E list of storm IDs. storm_object_dict['storm_times_unix_sec']: length-E numpy array of storm times. storm_object_dict['target_array']: See output doc for `training_validation_io.myrorss_generator_2d3d`. storm_object_dict['sounding_pressure_matrix_pascals']: numpy array (E x H_s) of pressures. If soundings were not read, this is None. """ storm_ids, storm_times_unix_sec = _find_examples_to_read( option_dict=option_dict, num_examples_total=num_examples_total) print '\n' example_file_names = option_dict[trainval_io.EXAMPLE_FILES_KEY] first_storm_time_unix_sec = option_dict[trainval_io.FIRST_STORM_TIME_KEY] last_storm_time_unix_sec = option_dict[trainval_io.LAST_STORM_TIME_KEY] num_grid_rows = option_dict[trainval_io.NUM_ROWS_KEY] num_grid_columns = option_dict[trainval_io.NUM_COLUMNS_KEY] azimuthal_shear_field_names = option_dict[trainval_io.RADAR_FIELDS_KEY] reflectivity_heights_m_agl = option_dict[trainval_io.RADAR_HEIGHTS_KEY] sounding_field_names = option_dict[trainval_io.SOUNDING_FIELDS_KEY] sounding_heights_m_agl = option_dict[trainval_io.SOUNDING_HEIGHTS_KEY] normalization_type_string = option_dict[trainval_io.NORMALIZATION_TYPE_KEY] normalization_param_file_name = option_dict[ trainval_io.NORMALIZATION_FILE_KEY] min_normalized_value = option_dict[trainval_io.MIN_NORMALIZED_VALUE_KEY] max_normalized_value = option_dict[trainval_io.MAX_NORMALIZED_VALUE_KEY] binarize_target = option_dict[trainval_io.BINARIZE_TARGET_KEY] this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[0], metadata_only=True) target_name = this_example_dict[input_examples.TARGET_NAME_KEY] num_classes = target_val_utils.target_name_to_num_classes( target_name=target_name, include_dead_storms=False) if sounding_field_names is None: sounding_field_names_to_read = None else: if soundings.PRESSURE_NAME in sounding_field_names: sounding_field_names_to_read = sounding_field_names + [] else: sounding_field_names_to_read = ( sounding_field_names + [soundings.PRESSURE_NAME] ) reflectivity_image_matrix_dbz = None az_shear_image_matrix_s01 = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None file_index = 0 while True: if file_index >= len(example_file_names): raise StopIteration print 'Reading data from: "{0:s}"...'.format( example_file_names[file_index]) this_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_names[file_index], include_soundings=sounding_field_names is not None, radar_field_names_to_keep=azimuthal_shear_field_names, radar_heights_to_keep_m_agl=reflectivity_heights_m_agl, sounding_field_names_to_keep=sounding_field_names_to_read, sounding_heights_to_keep_m_agl=sounding_heights_m_agl, first_time_to_keep_unix_sec=first_storm_time_unix_sec, last_time_to_keep_unix_sec=last_storm_time_unix_sec, num_rows_to_keep=num_grid_rows, num_columns_to_keep=num_grid_columns) file_index += 1 if this_example_dict is None: continue indices_to_keep = tracking_utils.find_storm_objects( all_storm_ids=this_example_dict[input_examples.STORM_IDS_KEY], all_times_unix_sec=this_example_dict[ input_examples.STORM_TIMES_KEY], storm_ids_to_keep=storm_ids, times_to_keep_unix_sec=storm_times_unix_sec, allow_missing=True) indices_to_keep = indices_to_keep[indices_to_keep >= 0] if len(indices_to_keep) == 0: continue this_example_dict = input_examples.subset_examples( example_dict=this_example_dict, indices_to_keep=indices_to_keep) include_soundings = ( input_examples.SOUNDING_MATRIX_KEY in this_example_dict) if include_soundings: pressure_index = this_example_dict[ input_examples.SOUNDING_FIELDS_KEY ].index(soundings.PRESSURE_NAME) this_pressure_matrix_pascals = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY][..., pressure_index] this_sounding_matrix = this_example_dict[ input_examples.SOUNDING_MATRIX_KEY] if soundings.PRESSURE_NAME not in sounding_field_names: this_sounding_matrix = this_sounding_matrix[..., -1] if target_values is None: reflectivity_image_matrix_dbz = ( this_example_dict[input_examples.REFL_IMAGE_MATRIX_KEY] + 0. ) az_shear_image_matrix_s01 = ( this_example_dict[input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY] + 0. ) target_values = ( this_example_dict[input_examples.TARGET_VALUES_KEY] + 0) if include_soundings: sounding_matrix = this_sounding_matrix + 0. sounding_pressure_matrix_pascals = ( this_pressure_matrix_pascals + 0.) else: reflectivity_image_matrix_dbz = numpy.concatenate( (reflectivity_image_matrix_dbz, this_example_dict[input_examples.REFL_IMAGE_MATRIX_KEY]), axis=0) az_shear_image_matrix_s01 = numpy.concatenate(( az_shear_image_matrix_s01, this_example_dict[input_examples.AZ_SHEAR_IMAGE_MATRIX_KEY] ), axis=0) target_values = numpy.concatenate(( target_values, this_example_dict[input_examples.TARGET_VALUES_KEY] )) if include_soundings: sounding_matrix = numpy.concatenate( (sounding_matrix, this_sounding_matrix), axis=0) sounding_pressure_matrix_pascals = numpy.concatenate( (sounding_pressure_matrix_pascals, this_pressure_matrix_pascals), axis=0) if normalization_type_string is not None: reflectivity_image_matrix_dbz = dl_utils.normalize_radar_images( radar_image_matrix=reflectivity_image_matrix_dbz, field_names=[radar_utils.REFL_NAME], normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') az_shear_image_matrix_s01 = dl_utils.normalize_radar_images( radar_image_matrix=az_shear_image_matrix_s01, field_names=azimuthal_shear_field_names, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') if include_soundings: sounding_matrix = dl_utils.normalize_soundings( sounding_matrix=sounding_matrix, field_names=sounding_field_names, normalization_type_string=normalization_type_string, normalization_param_file_name=normalization_param_file_name, min_normalized_value=min_normalized_value, max_normalized_value=max_normalized_value).astype('float32') list_of_predictor_matrices = [ reflectivity_image_matrix_dbz, az_shear_image_matrix_s01 ] if include_soundings: list_of_predictor_matrices.append(sounding_matrix) target_array = _finalize_targets( target_values=target_values, binarize_target=binarize_target, num_classes=num_classes) storm_object_dict = { INPUT_MATRICES_KEY: list_of_predictor_matrices, TARGET_ARRAY_KEY: target_array, STORM_IDS_KEY: this_example_dict[input_examples.STORM_IDS_KEY], STORM_TIMES_KEY: this_example_dict[input_examples.STORM_TIMES_KEY], SOUNDING_PRESSURES_KEY: sounding_pressure_matrix_pascals + 0. } reflectivity_image_matrix_dbz = None az_shear_image_matrix_s01 = None sounding_matrix = None target_values = None sounding_pressure_matrix_pascals = None yield storm_object_dict
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns, normalization_file_name, output_dir_name): """Makes figure to explain one convolution block. This is effectively the main method. :param example_file_name: See documentation at top of file. :param example_indices: Same. :param num_radar_rows: Same. :param num_radar_columns: Same. :param normalization_file_name: Same. :param output_dir_name: Same. """ if num_radar_rows <= 0: num_radar_rows = None if num_radar_columns <= 0: num_radar_columns = None file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=example_file_name, read_all_target_vars=False, target_name=DUMMY_TARGET_NAME, include_soundings=False, num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns, radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL], dtype=int) ) if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict: input_feature_matrix = example_dict[ input_examples.REFL_IMAGE_MATRIX_KEY] else: field_index = example_dict[input_examples.RADAR_FIELDS_KEY].index( RADAR_FIELD_NAME ) input_feature_matrix = example_dict[ input_examples.RADAR_IMAGE_MATRIX_KEY ][..., [field_index]] num_examples = input_feature_matrix.shape[0] error_checking.assert_is_geq_numpy_array(example_indices, 0) error_checking.assert_is_less_than_numpy_array( example_indices, num_examples) input_feature_matrix = dl_utils.normalize_radar_images( radar_image_matrix=input_feature_matrix, field_names=[RADAR_FIELD_NAME], normalization_type_string=NORMALIZATION_TYPE_STRING, normalization_param_file_name=normalization_file_name) if len(input_feature_matrix.shape) == 4: input_feature_matrix = input_feature_matrix[..., 0] else: input_feature_matrix = input_feature_matrix[..., 0, 0] input_feature_matrix = numpy.expand_dims(input_feature_matrix, axis=-1) print('Doing convolution for all {0:d} examples...'.format(num_examples)) feature_matrix_after_conv = None for i in range(num_examples): this_feature_matrix = standalone_utils.do_2d_convolution( feature_matrix=input_feature_matrix[i, ...] + 0, kernel_matrix=KERNEL_MATRIX, pad_edges=False, stride_length_px=1 )[0, ...] if feature_matrix_after_conv is None: feature_matrix_after_conv = numpy.full( (num_examples,) + this_feature_matrix.shape, numpy.nan ) feature_matrix_after_conv[i, ...] = this_feature_matrix print('Doing activation for all {0:d} examples...'.format(num_examples)) feature_matrix_after_activn = standalone_utils.do_activation( input_values=feature_matrix_after_conv + 0, function_name=architecture_utils.RELU_FUNCTION_STRING, alpha=0.2) print('Doing batch norm for all {0:d} examples...'.format(num_examples)) feature_matrix_after_bn = standalone_utils.do_batch_normalization( feature_matrix=feature_matrix_after_activn + 0 ) print('Doing max-pooling for all {0:d} examples...\n'.format(num_examples)) feature_matrix_after_pooling = None for i in range(num_examples): this_feature_matrix = standalone_utils.do_2d_pooling( feature_matrix=feature_matrix_after_bn[i, ...], stride_length_px=2, pooling_type_string=standalone_utils.MAX_POOLING_TYPE_STRING )[0, ...] if feature_matrix_after_pooling is None: feature_matrix_after_pooling = numpy.full( (num_examples,) + this_feature_matrix.shape, numpy.nan ) feature_matrix_after_pooling[i, ...] = this_feature_matrix for i in example_indices: this_output_file_name = '{0:s}/convolution_block{1:06d}.jpg'.format( output_dir_name, i) _plot_one_example( input_feature_matrix=input_feature_matrix[i, ...], feature_matrix_after_conv=feature_matrix_after_conv[i, ...], feature_matrix_after_activn=feature_matrix_after_activn[i, ...], feature_matrix_after_bn=feature_matrix_after_bn[i, ...], feature_matrix_after_pooling=feature_matrix_after_pooling[i, ...], output_file_name=this_output_file_name)
def _run(input_example_dir_name, storm_metafile_name, num_examples_in_subset, subset_randomly, output_example_file_name): """Extracts desired examples and writes them to one file. This is effectively the main method. :param input_example_dir_name: See documentation at top of file. :param storm_metafile_name: Same. :param num_examples_in_subset: Same. :param subset_randomly: Same. :param output_example_file_name: Same. """ print( 'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)) example_id_strings, example_times_unix_sec = ( tracking_io.read_ids_and_times(storm_metafile_name)) if not 0 < num_examples_in_subset < len(example_id_strings): num_examples_in_subset = None if num_examples_in_subset is not None: if subset_randomly: these_indices = numpy.linspace(0, len(example_id_strings) - 1, num=len(example_id_strings), dtype=int) these_indices = numpy.random.choice(these_indices, size=num_examples_in_subset, replace=False) example_id_strings = [example_id_strings[k] for k in these_indices] example_times_unix_sec = example_times_unix_sec[these_indices] else: example_id_strings = example_id_strings[:num_examples_in_subset] example_times_unix_sec = ( example_times_unix_sec[:num_examples_in_subset]) example_spc_date_strings = numpy.array([ time_conversion.time_to_spc_date_string(t) for t in example_times_unix_sec ]) spc_date_strings = numpy.unique(example_spc_date_strings) example_file_name_by_day = [ input_examples.find_example_file( top_directory_name=input_example_dir_name, shuffled=False, spc_date_string=d, raise_error_if_missing=True) for d in spc_date_strings ] num_days = len(spc_date_strings) for i in range(num_days): print('Reading data from: "{0:s}"...'.format( example_file_name_by_day[i])) all_example_dict = input_examples.read_example_file( netcdf_file_name=example_file_name_by_day[i], read_all_target_vars=True) these_indices = numpy.where( example_spc_date_strings == spc_date_strings[i])[0] desired_indices = tracking_utils.find_storm_objects( all_id_strings=all_example_dict[input_examples.FULL_IDS_KEY], all_times_unix_sec=all_example_dict[ input_examples.STORM_TIMES_KEY], id_strings_to_keep=[example_id_strings[k] for k in these_indices], times_to_keep_unix_sec=example_times_unix_sec[these_indices], allow_missing=False) desired_example_dict = input_examples.subset_examples( example_dict=all_example_dict, indices_to_keep=desired_indices) print('Writing {0:d} desired examples to: "{1:s}"...'.format( len(desired_indices), output_example_file_name)) input_examples.write_example_file( netcdf_file_name=output_example_file_name, example_dict=desired_example_dict, append_to_file=i > 0)
def _run(example_file_name, example_indices, num_radar_rows, num_radar_columns, normalization_file_name, output_dir_name): """Plots data augmentation. This is effectively the main method. :param example_file_name: See documentation at top of file. :param example_indices: Same. :param num_radar_rows: Same. :param num_radar_columns: Same. :param normalization_file_name: Same. :param output_dir_name: Same. """ if num_radar_rows <= 0: num_radar_rows = None if num_radar_columns <= 0: num_radar_columns = None file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) print('Reading data from: "{0:s}"...'.format(example_file_name)) example_dict = input_examples.read_example_file( netcdf_file_name=example_file_name, read_all_target_vars=True, include_soundings=False, num_rows_to_keep=num_radar_rows, num_columns_to_keep=num_radar_columns, radar_field_names_to_keep=[RADAR_FIELD_NAME], radar_heights_to_keep_m_agl=numpy.array([RADAR_HEIGHT_M_AGL], dtype=int)) if input_examples.REFL_IMAGE_MATRIX_KEY in example_dict: radar_matrix = example_dict[input_examples.REFL_IMAGE_MATRIX_KEY] else: radar_matrix = example_dict[input_examples.RADAR_IMAGE_MATRIX_KEY] num_examples_total = radar_matrix.shape[0] error_checking.assert_is_geq_numpy_array(example_indices, 0) error_checking.assert_is_less_than_numpy_array(example_indices, num_examples_total) radar_matrix = radar_matrix[example_indices, ...] full_storm_id_strings = [ example_dict[input_examples.FULL_IDS_KEY][k] for k in example_indices ] storm_times_unix_sec = example_dict[ input_examples.STORM_TIMES_KEY][example_indices] radar_matrix = dl_utils.normalize_radar_images( radar_image_matrix=radar_matrix, field_names=[RADAR_FIELD_NAME], normalization_type_string=NORMALIZATION_TYPE_STRING, normalization_param_file_name=normalization_file_name) num_examples = radar_matrix.shape[0] dummy_target_values = numpy.full(num_examples, 0, dtype=int) radar_matrix = trainval_io._augment_radar_images( list_of_predictor_matrices=[radar_matrix], target_array=dummy_target_values, x_translations_pixels=X_TRANSLATIONS_PX, y_translations_pixels=Y_TRANSLATIONS_PX, ccw_rotation_angles_deg=CCW_ROTATION_ANGLES_DEG, noise_standard_deviation=NOISE_STANDARD_DEVIATION, num_noisings=1, flip_in_x=False, flip_in_y=False)[0][0] radar_matrix = dl_utils.denormalize_radar_images( radar_image_matrix=radar_matrix, field_names=[RADAR_FIELD_NAME], normalization_type_string=NORMALIZATION_TYPE_STRING, normalization_param_file_name=normalization_file_name) orig_radar_matrix = radar_matrix[:num_examples, ...] radar_matrix = radar_matrix[num_examples:, ...] translated_radar_matrix = radar_matrix[:num_examples, ...] radar_matrix = radar_matrix[num_examples:, ...] rotated_radar_matrix = radar_matrix[:num_examples, ...] noised_radar_matrix = radar_matrix[num_examples:, ...] for i in range(num_examples): _plot_one_example(orig_radar_matrix=orig_radar_matrix[i, ...], translated_radar_matrix=translated_radar_matrix[i, ...], rotated_radar_matrix=rotated_radar_matrix[i, ...], noised_radar_matrix=noised_radar_matrix[i, ...], output_dir_name=output_dir_name, full_storm_id_string=full_storm_id_strings[i], storm_time_unix_sec=storm_times_unix_sec[i])