def _create_best_tracks(start_time_string, end_time_string, top_input_dir_name, data_source, top_output_dir_name, tracking_scale_metres2): """Runs the best-track algorithm with default parameters. :param start_time_string: See documentation at top of file. :param end_time_string: Same. :param top_input_dir_name: Same. :param data_source: Same. :param top_output_dir_name: Same. :param tracking_scale_metres2: Same. """ tracking_utils.check_data_source(data_source) start_time_unix_sec = time_conversion.string_to_unix_sec( start_time_string, INPUT_TIME_FORMAT) end_time_unix_sec = time_conversion.string_to_unix_sec( end_time_string, INPUT_TIME_FORMAT) first_date_string = time_conversion.time_to_spc_date_string( start_time_unix_sec) last_date_string = time_conversion.time_to_spc_date_string( end_time_unix_sec) file_dictionary = best_tracks_smart_io.find_files_for_smart_io( start_time_unix_sec=start_time_unix_sec, start_spc_date_string=first_date_string, end_time_unix_sec=end_time_unix_sec, end_spc_date_string=last_date_string, data_source=data_source, tracking_scale_metres2=tracking_scale_metres2, top_input_dir_name=top_input_dir_name, top_output_dir_name=top_output_dir_name) best_tracks_smart_io.run_best_track(smart_file_dict=file_dictionary)
def _get_relative_processed_directory(data_source=None, spc_date_unix_sec=None, unix_time_sec=None, tracking_scale_metres2=None): """Generates relative path for processed storm-tracking files. :param data_source: Data source (either "segmotion" or "probSevere"). :param spc_date_unix_sec: SPC date in Unix format (needed only if data_source = "segmotion"). :param unix_time_sec: Valid time (needed only if data_source = "probSevere"). :param tracking_scale_metres2: Tracking scale. :return: relative_processed_dir_name: Relative path for processed storm- tracking files. """ if data_source == SEGMOTION_SOURCE_ID: date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) else: date_string = time_conversion.unix_sec_to_string( unix_time_sec, DATE_FORMAT) return '{0:s}/scale_{1:d}m2'.format(date_string, int(tracking_scale_metres2))
def find_match_file(top_directory_name, valid_time_unix_sec, raise_error_if_missing=False): """Finds match file. A "match file" matches storm objects in one dataset (e.g., MYRORSS or GridRad) to those in another dataset, at one time step. :param top_directory_name: Name of top-level directory. :param valid_time_unix_sec: Valid time. :param raise_error_if_missing: See doc for `find_file`. :return: match_file_name: Path to match file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( valid_time_unix_sec) match_file_name = '{0:s}/{1:s}/{2:s}/storm-matches_{3:s}.p'.format( top_directory_name, spc_date_string[:4], spc_date_string, time_conversion.unix_sec_to_string( valid_time_unix_sec, FILE_NAME_TIME_FORMAT) ) if raise_error_if_missing and not os.path.isfile(match_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( match_file_name) raise ValueError(error_string) return match_file_name
def find_file(unix_time_sec, top_directory_name, raise_error_if_missing=True): """Finds GridRad file on local machine. Each GridRad file contains all fields at all heights for one valid time. :param unix_time_sec: Valid time. :param top_directory_name: Name of top-level directory with GridRad. :param raise_error_if_missing: Boolean flag. If file is missing and raise_error_if_missing = True, will raise error. If file is missing and raise_error_if_missing = False, will return *expected* path to file. :return: gridrad_file_name: Path to GridRad file. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(top_directory_name) spc_date_string = time_conversion.time_to_spc_date_string(unix_time_sec) gridrad_file_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, spc_date_string[:4], spc_date_string, _get_pathless_file_name(unix_time_sec)) if raise_error_if_missing and not os.path.isfile(gridrad_file_name): error_string = ('Cannot find GridRad file. Expected at: "{0:s}"' ).format(gridrad_file_name) raise ValueError(error_string) return gridrad_file_name
def find_raw_file(unix_time_sec=None, spc_date_unix_sec=None, field_name=None, height_m_agl=None, data_source=None, top_directory_name=None, raise_error_if_missing=True): """Finds raw file on local machine. This file should contain one radar field at one height and one time step. :param unix_time_sec: Time in Unix format. :param spc_date_unix_sec: SPC date in Unix format. :param field_name: Name of radar field in new format (as opposed to MYRORSS or MRMS format). :param height_m_agl: Height (metres above ground level). :param data_source: Data source (either "myrorss" or "mrms"). :param top_directory_name: Top-level directory for raw files. :param raise_error_if_missing: Boolean flag. If raise_error_if_missing = True and file is missing, will raise error. :return: raw_file_name: Path to raw file. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) relative_directory_name = get_relative_dir_for_raw_files( field_name=field_name, height_m_agl=height_m_agl, data_source=data_source) pathless_file_name = _get_pathless_raw_file_name(unix_time_sec, zipped=True) raw_file_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.time_to_spc_date_string(spc_date_unix_sec), relative_directory_name, pathless_file_name) if raise_error_if_missing and not os.path.isfile(raw_file_name): pathless_file_name = _get_pathless_raw_file_name( unix_time_sec, zipped=False) raw_file_name = '{0:s}/{1:s}/{2:s}/{3:s}'.format( top_directory_name, time_conversion.time_to_spc_date_string(spc_date_unix_sec), relative_directory_name, pathless_file_name) if raise_error_if_missing and not os.path.isfile(raw_file_name): raise ValueError( 'Cannot find raw file. Expected at location: ' + raw_file_name) return raw_file_name
def read_stats_from_xml(xml_file_name, spc_date_unix_sec=None): """Reads storm statistics from XML file. :param xml_file_name: Path to input file. :param spc_date_unix_sec: SPC date in Unix format. :return: stats_table: pandas DataFrame with the following columns. stats_table.storm_id: String ID for storm cell. stats_table.east_velocity_m_s01: Eastward velocity (m/s). stats_table.north_velocity_m_s01: Northward velocity (m/s). stats_table.age_sec: Age of storm cell (seconds). """ error_checking.assert_file_exists(xml_file_name) xml_tree = _open_xml_file(xml_file_name) storm_dict = {} this_column_name = None this_column_name_orig = None this_column_values = None for this_element in xml_tree.iter(): if this_element.tag == 'datacolumn': if this_column_name_orig in XML_COLUMN_NAMES_ORIG: storm_dict.update({this_column_name: this_column_values}) this_column_name_orig = this_element.attrib['name'] if this_column_name_orig in XML_COLUMN_NAMES_ORIG: this_column_name = _xml_column_name_orig_to_new( this_column_name_orig) this_column_values = [] continue if this_column_name_orig not in XML_COLUMN_NAMES_ORIG: continue if this_column_name == tracking_io.STORM_ID_COLUMN: this_column_values.append(this_element.attrib['value']) elif this_column_name == tracking_io.NORTH_VELOCITY_COLUMN: this_column_values.append(-1 * float(this_element.attrib['value'])) elif this_column_name == tracking_io.EAST_VELOCITY_COLUMN: this_column_values.append(float(this_element.attrib['value'])) elif this_column_name == tracking_io.AGE_COLUMN: this_column_values.append( int(numpy.round(float(this_element.attrib['value'])))) stats_table = pandas.DataFrame.from_dict(storm_dict) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) storm_ids = _append_spc_date_to_storm_ids( stats_table[tracking_io.STORM_ID_COLUMN].values, spc_date_string) stats_table = stats_table.assign( **{tracking_io.STORM_ID_COLUMN: storm_ids}) return tracking_io.remove_rows_with_nan(stats_table)
def find_target_file(top_directory_name, event_type_string, spc_date_string, raise_error_if_missing=True, unix_time_sec=None): """Locates file with target values for either one time or one SPC date. :param top_directory_name: Name of top-level directory with target files. :param event_type_string: Event type (must be accepted by `linkage.check_event_type`). :param spc_date_string: SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :param unix_time_sec: Valid time. :return: target_file_name: Path to linkage file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) linkage.check_event_type(event_type_string) error_checking.assert_is_boolean(raise_error_if_missing) if unix_time_sec is None: time_conversion.spc_date_string_to_unix_sec(spc_date_string) if event_type_string == linkage.WIND_EVENT_STRING: target_file_name = '{0:s}/{1:s}/wind_labels_{2:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string) else: target_file_name = '{0:s}/{1:s}/tornado_labels_{2:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string) else: spc_date_string = time_conversion.time_to_spc_date_string( unix_time_sec) valid_time_string = time_conversion.unix_sec_to_string( unix_time_sec, TIME_FORMAT) if event_type_string == linkage.WIND_EVENT_STRING: target_file_name = '{0:s}/{1:s}/{2:s}/wind_labels_{3:s}.nc'.format( top_directory_name, spc_date_string[:4], spc_date_string, valid_time_string) else: target_file_name = ( '{0:s}/{1:s}/{2:s}/tornado_labels_{3:s}.nc').format( top_directory_name, spc_date_string[:4], spc_date_string, valid_time_string) if raise_error_if_missing and not os.path.isfile(target_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( target_file_name) raise ValueError(error_string) return target_file_name
def find_processed_file(top_processed_dir_name, tracking_scale_metres2, data_source, unix_time_sec, spc_date_string=None, raise_error_if_missing=True): """Finds processed tracking file. This file should contain storm outlines and tracking statistics for one time step. :param top_processed_dir_name: See doc for `_check_input_args_for_file_finding`. :param tracking_scale_metres2: Same. :param data_source: Same. :param unix_time_sec: Valid time. :param spc_date_string: [used only if data_source == "probsevere"] SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If the file is missing and `raise_error_if_missing = True`, this method will error out. If the file is missing and `raise_error_if_missing = False`, will return the *expected* path. :return: processed_file_name: Path to processed tracking file. :raises: ValueError: if the file is missing and `raise_error_if_missing = True`. """ tracking_scale_metres2 = _check_input_args_for_file_finding( top_processed_dir_name=top_processed_dir_name, tracking_scale_metres2=tracking_scale_metres2, data_source=data_source, raise_error_if_missing=raise_error_if_missing) if data_source == tracking_utils.SEGMOTION_SOURCE_ID: date_string = spc_date_string else: date_string = time_conversion.time_to_spc_date_string(unix_time_sec) processed_file_name = ( '{0:s}/{1:s}/{2:s}/scale_{3:d}m2/{4:s}_{5:s}_{6:s}{7:s}').format( top_processed_dir_name, date_string[:4], date_string, tracking_scale_metres2, PREFIX_FOR_PATHLESS_FILE_NAMES, data_source, time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_IN_FILE_NAMES), FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(processed_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( processed_file_name) raise ValueError(error_string) return processed_file_name
def find_file( top_prediction_dir_name, first_init_time_unix_sec, last_init_time_unix_sec, gridded, raise_error_if_missing=False): """Finds gridded or ungridded prediction files. :param top_prediction_dir_name: Name of top-level directory with prediction files. :param first_init_time_unix_sec: First initial time in file. The "initial time" is the time of the storm object for which the prediction is being made. This is different than the valid-time window (time range for which the prediction is valid). :param last_init_time_unix_sec: Last initial time in file. :param gridded: Boolean flag. If True, will look for gridded file. If False, will look for ungridded file. :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: prediction_file_name: Path to prediction file. If file is missing and `raise_error_if_missing = False`, this will be the expected path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ # TODO(thunderhoser): Put lead time in file names. error_checking.assert_is_string(top_prediction_dir_name) error_checking.assert_is_integer(first_init_time_unix_sec) error_checking.assert_is_integer(last_init_time_unix_sec) error_checking.assert_is_geq( last_init_time_unix_sec, first_init_time_unix_sec) error_checking.assert_is_boolean(gridded) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( first_init_time_unix_sec) prediction_file_name = ( '{0:s}/{1:s}/{2:s}/{3:s}_predictions_{4:s}_{5:s}{6:s}' ).format( top_prediction_dir_name, spc_date_string[:4], spc_date_string, 'gridded' if gridded else 'ungridded', time_conversion.unix_sec_to_string( first_init_time_unix_sec, FILE_NAME_TIME_FORMAT), time_conversion.unix_sec_to_string( last_init_time_unix_sec, FILE_NAME_TIME_FORMAT), '.p' if gridded else '.nc' ) if raise_error_if_missing and not os.path.isfile(prediction_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( prediction_file_name) raise ValueError(error_string) return prediction_file_name
def find_file(top_tracking_dir_name, tracking_scale_metres2, source_name, valid_time_unix_sec, spc_date_string=None, raise_error_if_missing=True): """Finds tracking file. This file should contain polygons, velocities, and other properties for one time step. :param top_tracking_dir_name: See doc for `_check_file_finding_args`. :param tracking_scale_metres2: Same. :param source_name: Same. :param valid_time_unix_sec: Valid time. :param spc_date_string: [used only if data source is segmotion] SPC date (format "yyyymmdd"). :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: tracking_file_name: Path to tracking file. If file is missing and `raise_error_if_missing = False`, this will be the *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ tracking_scale_metres2 = _check_file_finding_args( top_tracking_dir_name=top_tracking_dir_name, tracking_scale_metres2=tracking_scale_metres2, source_name=source_name, raise_error_if_missing=raise_error_if_missing) if source_name == tracking_utils.SEGMOTION_NAME: date_string = spc_date_string else: date_string = time_conversion.time_to_spc_date_string( valid_time_unix_sec) directory_name = '{0:s}/{1:s}/{2:s}/scale_{3:d}m2'.format( top_tracking_dir_name, date_string[:4], date_string, tracking_scale_metres2) tracking_file_name = '{0:s}/{1:s}_{2:s}_{3:s}{4:s}'.format( directory_name, FILE_NAME_PREFIX, source_name, time_conversion.unix_sec_to_string(valid_time_unix_sec, FILE_NAME_TIME_FORMAT), FILE_EXTENSION) if raise_error_if_missing and not os.path.isfile(tracking_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( tracking_file_name) raise ValueError(error_string) return tracking_file_name
def find_classification_file(top_directory_name, valid_time_unix_sec, desire_zipped, allow_zipped_or_unzipped, raise_error_if_missing=True): """Finds file with echo classifications. :param top_directory_name: Name of top-level directory. :param valid_time_unix_sec: Valid time. :param desire_zipped: Boolean flag. If True, will look for zipped file first. If False, will look for unzipped first. :param allow_zipped_or_unzipped: Boolean flag. If True and the first file is not found, will look for the second. :param raise_error_if_missing: Boolean flag. If file is missing and `raise_error_if_missing = True`, this method will error out. :return: classification_file_name: Path to classification file. If file is missing and `raise_error_if_missing = True`, this will be *expected* path. :raises: ValueError: if file is missing and `raise_error_if_missing = True`. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(desire_zipped) error_checking.assert_is_boolean(allow_zipped_or_unzipped) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( valid_time_unix_sec) classification_file_name = ( '{0:s}/{1:s}/{2:s}/echo_classification_{3:s}.nc').format( top_directory_name, spc_date_string[:4], spc_date_string, time_conversion.unix_sec_to_string(valid_time_unix_sec, TIME_FORMAT)) if desire_zipped: classification_file_name += '.gz' if (allow_zipped_or_unzipped and not os.path.isfile(classification_file_name)): if desire_zipped: classification_file_name = classification_file_name[:-3] else: classification_file_name += '.gz' if raise_error_if_missing and not os.path.isfile(classification_file_name): error_string = 'Cannot find file. Expected at: "{0:s}"'.format( classification_file_name) raise ValueError(error_string) return classification_file_name
def _run(storm_metafile_name, warning_dir_name): """Finds which storms are linked to an NWS tornado warning. This is effectively the main method. :param storm_metafile_name: See documentation at top of file. :param warning_dir_name: Same. """ print( 'Reading storm metadata from: "{0:s}"...'.format(storm_metafile_name)) full_storm_id_strings, valid_times_unix_sec = ( tracking_io.read_ids_and_times(storm_metafile_name)) secondary_id_strings = ( temporal_tracking.full_to_partial_ids(full_storm_id_strings)[-1]) these_times_unix_sec = numpy.concatenate( (valid_times_unix_sec, valid_times_unix_sec - NUM_SECONDS_PER_DAY, valid_times_unix_sec + NUM_SECONDS_PER_DAY)) spc_date_strings = [ time_conversion.time_to_spc_date_string(t) for t in these_times_unix_sec ] spc_date_strings = numpy.unique(numpy.array(spc_date_strings)) linked_secondary_id_strings = [] for this_spc_date_string in spc_date_strings: this_file_name = '{0:s}/tornado_warnings_{1:s}.p'.format( warning_dir_name, this_spc_date_string) print('Reading warnings from: "{0:s}"...'.format(this_file_name)) this_file_handle = open(this_file_name, 'rb') this_warning_table = pickle.load(this_file_handle) this_file_handle.close() this_num_warnings = len(this_warning_table.index) for k in range(this_num_warnings): linked_secondary_id_strings += ( this_warning_table[LINKED_SECONDARY_IDS_KEY].values[k]) print(SEPARATOR_STRING) storm_warned_flags = numpy.array( [s in linked_secondary_id_strings for s in secondary_id_strings], dtype=bool) print(('{0:d} of {1:d} storm objects are linked to an NWS tornado warning!' ).format(numpy.sum(storm_warned_flags), len(storm_warned_flags)))
def find_local_polygon_file(unix_time_sec=None, spc_date_unix_sec=None, top_raw_directory_name=None, tracking_scale_metres2=None, raise_error_if_missing=True): """Finds polygon file on local machine. This file should contain storm outlines (polygons) for one time step and one tracking scale. :param unix_time_sec: Valid time. :param spc_date_unix_sec: SPC date. :param top_raw_directory_name: Name of top-level directory with raw segmotion files. :param tracking_scale_metres2: Tracking scale. :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. :return: polygon_file_name: Path to polygon file. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(top_raw_directory_name) error_checking.assert_is_greater(tracking_scale_metres2, 0.) error_checking.assert_is_boolean(raise_error_if_missing) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) directory_name = '{0:s}/{1:s}'.format( top_raw_directory_name, _get_relative_polygon_dir_physical_scale(spc_date_string, tracking_scale_metres2)) pathless_file_name = _get_pathless_polygon_file_name(unix_time_sec, zipped=True) polygon_file_name = '{0:s}/{1:s}'.format(directory_name, pathless_file_name) if raise_error_if_missing and not os.path.isfile(polygon_file_name): pathless_file_name = _get_pathless_polygon_file_name(unix_time_sec, zipped=False) polygon_file_name = '{0:s}/{1:s}'.format(directory_name, pathless_file_name) if raise_error_if_missing and not os.path.isfile(polygon_file_name): raise ValueError('Cannot find polygon file. Expected at location: ' + polygon_file_name) return polygon_file_name
def unzip_1day_tar_file(tar_file_name, spc_date_unix_sec=None, top_target_directory_name=None, scales_to_extract_metres2=None): """Unzips tar file with segmotion output for one SPC date. :param tar_file_name: Path to input file. :param spc_date_unix_sec: SPC date. :param top_target_directory_name: Name of top-level output directory. :param scales_to_extract_metres2: 1-D numpy array of tracking scales to extract. :return: target_directory_name: Path to output directory. This will be "<top_target_directory_name>/<yyyymmdd>", where <yyyymmdd> is the SPC date. """ error_checking.assert_file_exists(tar_file_name) error_checking.assert_is_greater_numpy_array(scales_to_extract_metres2, 0) error_checking.assert_is_integer_numpy_array(scales_to_extract_metres2) error_checking.assert_is_numpy_array(scales_to_extract_metres2, num_dimensions=1) num_scales_to_extract = len(scales_to_extract_metres2) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) directory_names_to_unzip = [] for j in range(num_scales_to_extract): this_relative_stats_dir_name = _get_relative_stats_dir_physical_scale( spc_date_string, scales_to_extract_metres2[j]) this_relative_polygon_dir_name = ( _get_relative_polygon_dir_physical_scale( spc_date_string, scales_to_extract_metres2[j])) directory_names_to_unzip.append( this_relative_stats_dir_name.replace(spc_date_string + '/', '')) directory_names_to_unzip.append( this_relative_polygon_dir_name.replace(spc_date_string + '/', '')) target_directory_name = '{0:s}/{1:s}'.format(top_target_directory_name, spc_date_string) unzipping.unzip_tar(tar_file_name, target_directory_name=target_directory_name, file_and_dir_names_to_unzip=directory_names_to_unzip) return target_directory_name
def _read_storm_locations_one_time( top_tracking_dir_name, valid_time_unix_sec, desired_full_id_strings): """Reads storm locations at one time. K = number of storm objects desired :param top_tracking_dir_name: See documentation at top of file. :param valid_time_unix_sec: Valid time. :param desired_full_id_strings: length-K list of full storm IDs. Locations will be read for these storms only. :return: desired_latitudes_deg: length-K numpy array of latitudes (deg N). :return: desired_longitudes_deg: length-K numpy array of longitudes (deg E). """ spc_date_string = time_conversion.time_to_spc_date_string( valid_time_unix_sec) desired_times_unix_sec = numpy.full( len(desired_full_id_strings), valid_time_unix_sec, dtype=int ) tracking_file_name = tracking_io.find_file( top_tracking_dir_name=top_tracking_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, source_name=tracking_utils.SEGMOTION_NAME, valid_time_unix_sec=valid_time_unix_sec, spc_date_string=spc_date_string, raise_error_if_missing=True) print('Reading storm locations from: "{0:s}"...'.format(tracking_file_name)) storm_object_table = tracking_io.read_file(tracking_file_name) desired_indices = tracking_utils.find_storm_objects( all_id_strings=storm_object_table[ tracking_utils.FULL_ID_COLUMN].values.tolist(), all_times_unix_sec=storm_object_table[ tracking_utils.VALID_TIME_COLUMN].values, id_strings_to_keep=desired_full_id_strings, times_to_keep_unix_sec=desired_times_unix_sec, allow_missing=False) desired_latitudes_deg = storm_object_table[ tracking_utils.CENTROID_LATITUDE_COLUMN].values[desired_indices] desired_longitudes_deg = storm_object_table[ tracking_utils.CENTROID_LONGITUDE_COLUMN].values[desired_indices] return desired_latitudes_deg, desired_longitudes_deg
def _link_winds_one_period(tracking_file_names, top_wind_dir_name, top_output_dir_name): """Links tornadoes to storms for one continuous period. :param tracking_file_names: 1-D list of paths to tracking files. Each will be read by `storm_tracking_io.read_processed_file`. :param top_wind_dir_name: See documentation at top of file. :param top_output_dir_name: Same. """ storm_to_winds_table, metadata_dict = linkage.link_storms_to_winds( tracking_file_names=tracking_file_names, top_wind_directory_name=top_wind_dir_name) print(SEPARATOR_STRING) spc_date_string_by_storm_object = [ time_conversion.time_to_spc_date_string(t) for t in storm_to_winds_table[tracking_utils.VALID_TIME_COLUMN].values ] unique_spc_date_strings, orig_to_unique_indices = numpy.unique( numpy.array(spc_date_string_by_storm_object), return_inverse=True) for i in range(len(unique_spc_date_strings)): this_output_file_name = linkage.find_linkage_file( top_directory_name=top_output_dir_name, event_type_string=linkage.WIND_EVENT_STRING, spc_date_string=unique_spc_date_strings[i], raise_error_if_missing=False) print('Writing linkages to: "{0:s}"...'.format(this_output_file_name)) these_storm_object_rows = numpy.where(orig_to_unique_indices == i)[0] linkage.write_linkage_file(pickle_file_name=this_output_file_name, storm_to_events_table=storm_to_winds_table. iloc[these_storm_object_rows], metadata_dict=metadata_dict) print(SEPARATOR_STRING)
def find_unsampled_file_one_time(unix_time_sec=None, spc_date_unix_sec=None, top_directory_name=None, raise_error_if_missing=True): """Locates file with unsampled feature vectors for one time step. :param unix_time_sec: Time step (valid time). :param spc_date_unix_sec: SPC (Storm Prediction Center) date. :param top_directory_name: Name of top-level directory with feature files. :param raise_error_if_missing: Boolean flag. If True and file is missing, this method will raise an error. :return: unsampled_file_name: Path to file with unsampled feature vectors for one time step. If raise_error_if_missing = False and file is missing, this will be the *expected* path. :raises: ValueError: if raise_error_if_missing = True and file is missing. """ error_checking.assert_is_string(top_directory_name) error_checking.assert_is_boolean(raise_error_if_missing) pathless_file_name = '{0:s}_{1:s}{2:s}'.format( FEATURE_FILE_PREFIX, time_conversion.unix_sec_to_string(unix_time_sec, TIME_FORMAT_IN_FILE_NAMES), FEATURE_FILE_EXTENSION) unsampled_file_name = '{0:s}/{1:s}/{2:s}'.format( top_directory_name, time_conversion.time_to_spc_date_string(spc_date_unix_sec), pathless_file_name) if raise_error_if_missing and not os.path.isfile(unsampled_file_name): raise ValueError( 'Cannot find file with feature vectors. Expected at location: ' + unsampled_file_name) return unsampled_file_name
def _run(top_orig_tracking_dir_name, top_new_tracking_dir_name, first_spc_date_string, last_spc_date_string, output_file_name): """Plots storms that were removed by remove_storms_outside_conus.py. This is effectively the main method. :param top_orig_tracking_dir_name: See documentation at top of file. :param top_new_tracking_dir_name: Same. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param output_file_name: Same. """ file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) spc_date_strings = time_conversion.get_spc_dates_in_range( first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string) orig_tracking_file_names = [] for d in spc_date_strings: orig_tracking_file_names += tracking_io.find_files_one_spc_date( top_tracking_dir_name=top_orig_tracking_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, source_name=tracking_utils.SEGMOTION_NAME, spc_date_string=d, raise_error_if_missing=False)[0] valid_times_unix_sec = numpy.array( [tracking_io.file_name_to_time(f) for f in orig_tracking_file_names], dtype=int) new_tracking_file_names = [ tracking_io.find_file( top_tracking_dir_name=top_new_tracking_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, source_name=tracking_utils.SEGMOTION_NAME, valid_time_unix_sec=t, spc_date_string=time_conversion.time_to_spc_date_string(t), raise_error_if_missing=True) for t in valid_times_unix_sec ] orig_storm_object_table = tracking_io.read_many_files( orig_tracking_file_names) print(SEPARATOR_STRING) new_storm_object_table = tracking_io.read_many_files( new_tracking_file_names) print(SEPARATOR_STRING) orig_storm_id_strings = ( orig_storm_object_table[tracking_utils.FULL_ID_COLUMN].values.tolist()) orig_storm_times_unix_sec = ( orig_storm_object_table[tracking_utils.VALID_TIME_COLUMN].values) new_storm_id_strings = ( new_storm_object_table[tracking_utils.FULL_ID_COLUMN].values.tolist()) new_storm_times_unix_sec = ( new_storm_object_table[tracking_utils.VALID_TIME_COLUMN].values) num_orig_storm_objects = len(orig_storm_object_table.index) orig_kept_flags = numpy.full(num_orig_storm_objects, 0, dtype=bool) these_indices = tracking_utils.find_storm_objects( all_id_strings=orig_storm_id_strings, all_times_unix_sec=orig_storm_times_unix_sec, id_strings_to_keep=new_storm_id_strings, times_to_keep_unix_sec=new_storm_times_unix_sec, allow_missing=False) orig_kept_flags[these_indices] = True orig_removed_indices = numpy.where(numpy.invert(orig_kept_flags))[0] print('{0:d} of {1:d} storm objects were outside CONUS.'.format( len(orig_removed_indices), num_orig_storm_objects)) removed_storm_object_table = orig_storm_object_table.iloc[ orig_removed_indices] removed_latitudes_deg = removed_storm_object_table[ tracking_utils.CENTROID_LATITUDE_COLUMN].values removed_longitudes_deg = removed_storm_object_table[ tracking_utils.CENTROID_LONGITUDE_COLUMN].values figure_object, axes_object, basemap_object = ( plotting_utils.create_equidist_cylindrical_map( min_latitude_deg=numpy.min(removed_latitudes_deg) - 1., max_latitude_deg=numpy.max(removed_latitudes_deg) + 1., min_longitude_deg=numpy.min(removed_longitudes_deg) - 1., max_longitude_deg=numpy.max(removed_longitudes_deg) + 1., resolution_string='i')) plotting_utils.plot_coastlines(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_countries(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_states_and_provinces(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_parallels(basemap_object=basemap_object, axes_object=axes_object, num_parallels=NUM_PARALLELS) plotting_utils.plot_meridians(basemap_object=basemap_object, axes_object=axes_object, num_meridians=NUM_MERIDIANS) conus_latitudes_deg, conus_longitudes_deg = ( conus_boundary.read_from_netcdf()) conus_latitudes_deg, conus_longitudes_deg = conus_boundary.erode_boundary( latitudes_deg=conus_latitudes_deg, longitudes_deg=conus_longitudes_deg, erosion_distance_metres=EROSION_DISTANCE_METRES) axes_object.plot(conus_longitudes_deg, conus_latitudes_deg, color=LINE_COLOUR, linestyle='solid', linewidth=LINE_WIDTH) axes_object.plot(removed_longitudes_deg, removed_latitudes_deg, linestyle='None', marker=MARKER_TYPE, markersize=MARKER_SIZE, markeredgewidth=0, markerfacecolor=MARKER_COLOUR, markeredgecolor=MARKER_COLOUR) print('Saving figure to: "{0:s}"...'.format(output_file_name)) figure_object.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object)
def _read_target_values(top_target_dir_name, storm_activations, activation_metadata_dict): """Reads target value for each storm object. E = number of examples (storm objects) :param top_target_dir_name: See documentation at top of file. :param storm_activations: length-E numpy array of activations. :param activation_metadata_dict: Dictionary returned by `model_activation.read_file`. :return: target_dict: Dictionary with the following keys. target_dict['full_id_strings']: length-E list of full storm IDs. target_dict['storm_times_unix_sec']: length-E numpy array of storm times. target_dict['storm_activations']: length-E numpy array of model activations. target_dict['storm_target_values']: length-E numpy array of target values. :raises: ValueError: if the target variable is multiclass and not binarized. """ # Convert input args. full_id_strings = activation_metadata_dict[model_activation.FULL_IDS_KEY] storm_times_unix_sec = activation_metadata_dict[ model_activation.STORM_TIMES_KEY] storm_spc_date_strings_numpy = numpy.array([ time_conversion.time_to_spc_date_string(t) for t in storm_times_unix_sec ], dtype=object) unique_spc_date_strings_numpy = numpy.unique(storm_spc_date_strings_numpy) # Read metadata for machine-learning model. model_file_name = activation_metadata_dict[ model_activation.MODEL_FILE_NAME_KEY] model_metadata_file_name = '{0:s}/model_metadata.p'.format( os.path.split(model_file_name)[0]) print('Reading metadata from: "{0:s}"...'.format(model_metadata_file_name)) model_metadata_dict = cnn.read_model_metadata(model_metadata_file_name) training_option_dict = model_metadata_dict[cnn.TRAINING_OPTION_DICT_KEY] target_name = training_option_dict[trainval_io.TARGET_NAME_KEY] num_classes = target_val_utils.target_name_to_num_classes( target_name=target_name, include_dead_storms=False) binarize_target = (training_option_dict[trainval_io.BINARIZE_TARGET_KEY] and num_classes > 2) if num_classes > 2 and not binarize_target: error_string = ( 'The target variable ("{0:s}") is multiclass, which this script ' 'cannot handle.').format(target_name) raise ValueError(error_string) event_type_string = target_val_utils.target_name_to_params(target_name)[ target_val_utils.EVENT_TYPE_KEY] # Read target values. storm_target_values = numpy.array([], dtype=int) id_sort_indices = numpy.array([], dtype=int) num_spc_dates = len(unique_spc_date_strings_numpy) for i in range(num_spc_dates): this_target_file_name = target_val_utils.find_target_file( top_directory_name=top_target_dir_name, event_type_string=event_type_string, spc_date_string=unique_spc_date_strings_numpy[i]) print('Reading data from: "{0:s}"...'.format(this_target_file_name)) this_target_value_dict = target_val_utils.read_target_values( netcdf_file_name=this_target_file_name, target_names=[target_name]) these_indices = numpy.where(storm_spc_date_strings_numpy == unique_spc_date_strings_numpy[i])[0] id_sort_indices = numpy.concatenate((id_sort_indices, these_indices)) these_indices = tracking_utils.find_storm_objects( all_id_strings=this_target_value_dict[ target_val_utils.FULL_IDS_KEY], all_times_unix_sec=this_target_value_dict[ target_val_utils.VALID_TIMES_KEY], id_strings_to_keep=[full_id_strings[k] for k in these_indices], times_to_keep_unix_sec=storm_times_unix_sec[these_indices]) if len(these_indices) == 0: continue these_target_values = this_target_value_dict[ target_val_utils.TARGET_MATRIX_KEY][these_indices, :] these_target_values = numpy.reshape(these_target_values, these_target_values.size) storm_target_values = numpy.concatenate( (storm_target_values, these_target_values)) good_indices = numpy.where( storm_target_values != target_val_utils.INVALID_STORM_INTEGER)[0] storm_target_values = storm_target_values[good_indices] id_sort_indices = id_sort_indices[good_indices] if binarize_target: storm_target_values = (storm_target_values == num_classes - 1).astype(int) return { FULL_IDS_KEY: [full_id_strings[k] for k in id_sort_indices], STORM_TIMES_KEY: storm_times_unix_sec[id_sort_indices], STORM_ACTIVATIONS_KEY: storm_activations[id_sort_indices], TARGET_VALUES_KEY: storm_target_values }
def get_storm_based_radar_stats_myrorss_or_mrms( storm_object_table, top_radar_dir_name, radar_metadata_dict_for_tracking, statistic_names=DEFAULT_STATISTIC_NAMES, percentile_levels=DEFAULT_PERCENTILE_LEVELS, radar_field_names=DEFAULT_FIELDS_FOR_MYRORSS_AND_MRMS, reflectivity_heights_m_asl=None, radar_source=radar_utils.MYRORSS_SOURCE_ID, dilate_azimuthal_shear=False, dilation_half_width_in_pixels=dilation.DEFAULT_HALF_WIDTH, dilation_percentile_level=DEFAULT_DILATION_PERCENTILE_LEVEL): """Computes radar statistics for each storm object. In this case, radar data must be from MYRORSS or MRMS. N = number of storm objects P = number of field/height pairs S = number of statistics (percentile- and non-percentile-based) :param storm_object_table: See documentation for `get_storm_based_radar_stats_gridrad`. :param top_radar_dir_name: See doc for `get_storm_based_radar_stats_gridrad`. :param radar_metadata_dict_for_tracking: Dictionary created by `myrorss_and_mrms_io.read_metadata_from_raw_file`, describing radar grid used to create storm objects. :param statistic_names: 1-D list of non-percentile-based statistics. :param percentile_levels: 1-D numpy array of percentile levels. :param radar_field_names: 1-D list of radar fields for which stats will be computed. :param reflectivity_heights_m_asl: 1-D numpy array of heights (metres above sea level) for the field "reflectivity_dbz". If "reflectivity_dbz" is not in `radar_field_names`, you can leave this as None. :param radar_source: Source of radar data (either "myrorss" or "mrms"). :param dilate_azimuthal_shear: Boolean flag. If False, azimuthal-shear stats will be based only on values inside the storm object. If True, azimuthal-shear fields will be dilated, so azimuthal-shear stats will be based on values inside and near the storm object. This is useful because sometimes large az-shear values occur just outside the storm object. :param dilation_half_width_in_pixels: See documentation for `dilation.dilate_2d_matrix`. :param dilation_percentile_level: See documentation for `dilation.dilate_2d_matrix`. :return: storm_object_statistic_table: pandas DataFrame with 2 + S * P columns. The last S * P columns are one for each statistic-field-height tuple. Names of these columns are determined by `radar_field_and_statistic_to_column_name` and `radar_field_and_percentile_to_column_name`. The first 2 columns are listed below. storm_object_statistic_table.full_id_string: Storm ID (taken from input table). storm_object_statistic_table.unix_time_sec: Valid time (taken from input table). """ error_checking.assert_is_boolean(dilate_azimuthal_shear) percentile_levels = _check_statistic_params(statistic_names, percentile_levels) # Find radar files. spc_date_strings = ( storm_object_table[tracking_utils.SPC_DATE_COLUMN].values.tolist()) file_dictionary = myrorss_and_mrms_io.find_many_raw_files( desired_times_unix_sec=storm_object_table[ tracking_utils.VALID_TIME_COLUMN].values.astype(int), spc_date_strings=spc_date_strings, data_source=radar_source, field_names=radar_field_names, top_directory_name=top_radar_dir_name, reflectivity_heights_m_asl=reflectivity_heights_m_asl) radar_file_name_matrix = file_dictionary[ myrorss_and_mrms_io.RADAR_FILE_NAMES_KEY] radar_field_name_by_pair = file_dictionary[ myrorss_and_mrms_io.FIELD_NAME_BY_PAIR_KEY] radar_height_by_pair_m_asl = file_dictionary[ myrorss_and_mrms_io.HEIGHT_BY_PAIR_KEY] valid_times_unix_sec = file_dictionary[ myrorss_and_mrms_io.UNIQUE_TIMES_KEY] valid_spc_date_strings = [ time_conversion.time_to_spc_date_string(t) for t in file_dictionary[ myrorss_and_mrms_io.SPC_DATES_AT_UNIQUE_TIMES_KEY] ] # Initialize output. num_field_height_pairs = len(radar_field_name_by_pair) num_valid_times = len(valid_times_unix_sec) num_statistics = len(statistic_names) num_percentiles = len(percentile_levels) num_storm_objects = len(storm_object_table.index) statistic_matrix = numpy.full( (num_storm_objects, num_field_height_pairs, num_statistics), numpy.nan) percentile_matrix = numpy.full( (num_storm_objects, num_field_height_pairs, num_percentiles), numpy.nan) valid_time_strings = [ time_conversion.unix_sec_to_string(t, DEFAULT_TIME_FORMAT) for t in valid_times_unix_sec ] for j in range(num_field_height_pairs): for i in range(num_valid_times): if radar_file_name_matrix[i, j] is None: continue print( ('Computing stats for "{0:s}" at {1:d} metres ASL and {2:s}...' ).format(radar_field_name_by_pair[j], int(numpy.round(radar_height_by_pair_m_asl[j])), valid_time_strings[i])) this_metadata_dict = ( myrorss_and_mrms_io.read_metadata_from_raw_file( radar_file_name_matrix[i, j], data_source=radar_source)) if radar_metadata_dict_for_tracking is None: this_storm_to_grid_points_table = storm_object_table[ STORM_OBJECT_TO_GRID_PTS_COLUMNS] else: this_storm_to_grid_points_table = ( get_grid_points_in_storm_objects( storm_object_table=storm_object_table, orig_grid_metadata_dict= radar_metadata_dict_for_tracking, new_grid_metadata_dict=this_metadata_dict)) # Read data for [j]th field/height pair at [i]th time step. sparse_grid_table_this_field_height = ( myrorss_and_mrms_io.read_data_from_sparse_grid_file( radar_file_name_matrix[i, j], field_name_orig=this_metadata_dict[ myrorss_and_mrms_io.FIELD_NAME_COLUMN_ORIG], data_source=radar_source, sentinel_values=this_metadata_dict[ radar_utils.SENTINEL_VALUE_COLUMN])) radar_matrix_this_field_height = radar_s2f.sparse_to_full_grid( sparse_grid_table_this_field_height, this_metadata_dict)[0] if (dilate_azimuthal_shear and radar_field_name_by_pair[j] in AZIMUTHAL_SHEAR_FIELD_NAMES): print('Dilating azimuthal-shear field...') radar_matrix_this_field_height = dilation.dilate_2d_matrix( radar_matrix_this_field_height, percentile_level=dilation_percentile_level, half_width_in_pixels=dilation_half_width_in_pixels, take_largest_absolute_value=True) radar_matrix_this_field_height[numpy.isnan( radar_matrix_this_field_height)] = 0. # Find storm objects at [i]th valid time. these_storm_flags = numpy.logical_and( storm_object_table[tracking_utils.VALID_TIME_COLUMN].values == valid_times_unix_sec[i], storm_object_table[tracking_utils.SPC_DATE_COLUMN].values == valid_spc_date_strings[i]) these_storm_indices = numpy.where(these_storm_flags)[0] # Extract storm-based radar stats for [j]th field/height pair at # [i]th time step. for this_storm_index in these_storm_indices: radar_values_this_storm = extract_radar_grid_points( radar_matrix_this_field_height, row_indices=this_storm_to_grid_points_table[ tracking_utils.ROWS_IN_STORM_COLUMN]. values[this_storm_index].astype(int), column_indices=this_storm_to_grid_points_table[ tracking_utils.COLUMNS_IN_STORM_COLUMN]. values[this_storm_index].astype(int)) (statistic_matrix[this_storm_index, j, :], percentile_matrix[this_storm_index, j, :]) = get_spatial_statistics( radar_values_this_storm, statistic_names=statistic_names, percentile_levels=percentile_levels) # Create pandas DataFrame. storm_object_statistic_dict = {} for j in range(num_field_height_pairs): for k in range(num_statistics): this_column_name = radar_field_and_statistic_to_column_name( radar_field_name=radar_field_name_by_pair[j], radar_height_m_asl=radar_height_by_pair_m_asl[j], statistic_name=statistic_names[k]) storm_object_statistic_dict.update( {this_column_name: statistic_matrix[:, j, k]}) for k in range(num_percentiles): this_column_name = radar_field_and_percentile_to_column_name( radar_field_name=radar_field_name_by_pair[j], radar_height_m_asl=radar_height_by_pair_m_asl[j], percentile_level=percentile_levels[k]) storm_object_statistic_dict.update( {this_column_name: percentile_matrix[:, j, k]}) storm_object_statistic_table = pandas.DataFrame.from_dict( storm_object_statistic_dict) return pandas.concat([ storm_object_table[STORM_COLUMNS_TO_KEEP], storm_object_statistic_table ], axis=1)
def find_many_raw_files( desired_times_unix_sec, spc_date_strings, data_source, field_names, top_directory_name, reflectivity_heights_m_asl=None, max_time_offset_for_az_shear_sec=DEFAULT_MAX_TIME_OFFSET_FOR_AZ_SHEAR_SEC, max_time_offset_for_non_shear_sec=DEFAULT_MAX_TIME_OFFSET_FOR_NON_SHEAR_SEC ): """Finds raw file for each field/height pair and time step. N = number of input times T = number of unique input times F = number of field/height pairs :param desired_times_unix_sec: length-N numpy array with desired valid times. :param spc_date_strings: length-N list of corresponding SPC dates (format "yyyymmdd"). :param data_source: Data source ("myrorss" or "mrms"). :param field_names: 1-D list of field names. :param top_directory_name: Name of top-level directory with radar data from the given source. :param reflectivity_heights_m_asl: 1-D numpy array of heights (metres above sea level) for the field "reflectivity_dbz". If "reflectivity_dbz" is not in `field_names`, leave this as None. :param max_time_offset_for_az_shear_sec: Max time offset (between desired and actual valid time) for azimuthal-shear fields. :param max_time_offset_for_non_shear_sec: Max time offset (between desired and actual valid time) for non-azimuthal-shear fields. :return: file_dictionary: Dictionary with the following keys. file_dictionary['radar_file_name_matrix']: T-by-F numpy array of paths to raw files. file_dictionary['unique_times_unix_sec']: length-T numpy array of unique valid times. file_dictionary['spc_date_strings_for_unique_times']: length-T numpy array of corresponding SPC dates. file_dictionary['field_name_by_pair']: length-F list of field names. file_dictionary['height_by_pair_m_asl']: length-F numpy array of heights (metres above sea level). """ field_name_by_pair, height_by_pair_m_asl = ( myrorss_and_mrms_utils.fields_and_refl_heights_to_pairs( field_names=field_names, data_source=data_source, refl_heights_m_asl=reflectivity_heights_m_asl)) num_fields = len(field_name_by_pair) error_checking.assert_is_integer_numpy_array(desired_times_unix_sec) error_checking.assert_is_numpy_array(desired_times_unix_sec, num_dimensions=1) num_times = len(desired_times_unix_sec) error_checking.assert_is_string_list(spc_date_strings) error_checking.assert_is_numpy_array(numpy.array(spc_date_strings), exact_dimensions=numpy.array( [num_times])) spc_dates_unix_sec = numpy.array([ time_conversion.spc_date_string_to_unix_sec(s) for s in spc_date_strings ]) time_matrix = numpy.hstack( (numpy.reshape(desired_times_unix_sec, (num_times, 1)), numpy.reshape(spc_dates_unix_sec, (num_times, 1)))) unique_time_matrix = numpy.vstack( {tuple(this_row) for this_row in time_matrix}).astype(int) unique_times_unix_sec = unique_time_matrix[:, 0] spc_dates_at_unique_times_unix_sec = unique_time_matrix[:, 1] sort_indices = numpy.argsort(unique_times_unix_sec) unique_times_unix_sec = unique_times_unix_sec[sort_indices] spc_dates_at_unique_times_unix_sec = spc_dates_at_unique_times_unix_sec[ sort_indices] num_unique_times = len(unique_times_unix_sec) radar_file_name_matrix = numpy.full((num_unique_times, num_fields), '', dtype=object) for i in range(num_unique_times): this_spc_date_string = time_conversion.time_to_spc_date_string( spc_dates_at_unique_times_unix_sec[i]) for j in range(num_fields): if field_name_by_pair[j] in AZIMUTHAL_SHEAR_FIELD_NAMES: this_max_time_offset_sec = max_time_offset_for_az_shear_sec this_raise_error_flag = False else: this_max_time_offset_sec = max_time_offset_for_non_shear_sec this_raise_error_flag = True if this_max_time_offset_sec == 0: radar_file_name_matrix[i, j] = find_raw_file( unix_time_sec=unique_times_unix_sec[i], spc_date_string=this_spc_date_string, field_name=field_name_by_pair[j], data_source=data_source, top_directory_name=top_directory_name, height_m_asl=height_by_pair_m_asl[j], raise_error_if_missing=this_raise_error_flag) else: radar_file_name_matrix[i, j] = find_raw_file_inexact_time( desired_time_unix_sec=unique_times_unix_sec[i], spc_date_string=this_spc_date_string, field_name=field_name_by_pair[j], data_source=data_source, top_directory_name=top_directory_name, height_m_asl=height_by_pair_m_asl[j], max_time_offset_sec=this_max_time_offset_sec, raise_error_if_missing=this_raise_error_flag) if radar_file_name_matrix[i, j] is None: this_time_string = time_conversion.unix_sec_to_string( unique_times_unix_sec[i], TIME_FORMAT_FOR_LOG_MESSAGES) warning_string = ( 'Cannot find file for "{0:s}" at {1:d} metres ASL and ' '{2:s}.').format(field_name_by_pair[j], int(height_by_pair_m_asl[j]), this_time_string) warnings.warn(warning_string) return { RADAR_FILE_NAMES_KEY: radar_file_name_matrix, UNIQUE_TIMES_KEY: unique_times_unix_sec, SPC_DATES_AT_UNIQUE_TIMES_KEY: spc_dates_at_unique_times_unix_sec, FIELD_NAME_BY_PAIR_KEY: field_name_by_pair, HEIGHT_BY_PAIR_KEY: numpy.round(height_by_pair_m_asl).astype(int) }
def _convert_to_myrorss_format(top_gridrad_dir_name, top_myrorss_dir_name, top_ruc_dir_name, top_rap_dir_name, output_field_name): """Converts GridRad data to MYRORSS format. :param top_gridrad_dir_name: See documentation at top of file. :param top_myrorss_dir_name: Same. :param top_ruc_dir_name: Same. :param top_rap_dir_name: Same. :param output_field_name: Same. """ gridrad_file_names = _find_gridrad_files(top_gridrad_dir_name) last_hour_string = 'NaN' target_height_matrix_m_asl = None for this_gridrad_file_name in gridrad_file_names: this_metadata_dict = gridrad_io.read_metadata_from_full_grid_file( this_gridrad_file_name) this_time_unix_sec = this_metadata_dict[radar_utils.UNIX_TIME_COLUMN] (this_refl_matrix_dbz, these_grid_point_heights_m_asl, these_grid_point_latitudes_deg, these_grid_point_longitudes_deg ) = gridrad_io.read_field_from_full_grid_file( netcdf_file_name=this_gridrad_file_name, field_name=INPUT_FIELD_NAME, metadata_dict=this_metadata_dict) if output_field_name == radar_utils.REFL_M10CELSIUS_NAME: this_hour_string = time_conversion.unix_sec_to_string( this_time_unix_sec, TIME_FORMAT_HOUR) if this_hour_string != last_hour_string: if this_time_unix_sec >= RAP_RUC_CUTOFF_TIME_UNIX_SEC: this_model_name = nwp_model_utils.RAP_MODEL_NAME this_top_model_dir_name = top_rap_dir_name else: this_model_name = nwp_model_utils.RUC_MODEL_NAME this_top_model_dir_name = top_ruc_dir_name target_height_matrix_m_asl = ( gridrad_utils.interp_temperature_surface_from_nwp( radar_grid_point_latitudes_deg= these_grid_point_latitudes_deg, radar_grid_point_longitudes_deg= these_grid_point_longitudes_deg, radar_time_unix_sec=this_time_unix_sec, critical_temperature_kelvins=TEMPERATURE_LEVEL_KELVINS, model_name=this_model_name, use_all_grids=False, grid_id=nwp_model_utils.NAME_OF_130GRID, top_grib_directory_name=this_top_model_dir_name)) last_hour_string = copy.deepcopy(this_hour_string) this_output_matrix = gridrad_utils.interp_reflectivity_to_heights( reflectivity_matrix_dbz=this_refl_matrix_dbz, grid_point_heights_m_asl=these_grid_point_heights_m_asl, target_height_matrix_m_asl=target_height_matrix_m_asl) elif output_field_name == radar_utils.REFL_COLUMN_MAX_NAME: this_output_matrix = gridrad_utils.get_column_max_reflectivity( this_refl_matrix_dbz) else: this_output_matrix = gridrad_utils.get_echo_tops( reflectivity_matrix_dbz=this_refl_matrix_dbz, grid_point_heights_m_asl=these_grid_point_heights_m_asl, critical_reflectivity_dbz=radar_utils. field_name_to_echo_top_refl(output_field_name)) this_spc_date_string = time_conversion.time_to_spc_date_string( this_time_unix_sec) this_myrorss_file_name = myrorss_and_mrms_io.find_raw_file( unix_time_sec=this_time_unix_sec, spc_date_string=this_spc_date_string, field_name=output_field_name, data_source=radar_utils.MYRORSS_SOURCE_ID, top_directory_name=top_myrorss_dir_name, raise_error_if_missing=False) this_myrorss_file_name = this_myrorss_file_name.replace('.gz', '') print('Writing "{0:s}" to MYRORSS file: "{1:s}"...'.format( output_field_name, this_myrorss_file_name)) myrorss_and_mrms_io.write_field_to_myrorss_file( field_matrix=this_output_matrix, netcdf_file_name=this_myrorss_file_name, field_name=output_field_name, metadata_dict=this_metadata_dict)
def _read_new_target_values(top_target_dir_name, new_target_name, full_storm_id_strings, storm_times_unix_sec, orig_target_values): """Reads new target values (for upgraded minimum EF rating). E = number of examples (storm objects) :param top_target_dir_name: See documentation at top of file. :param new_target_name: Name of new target variable (with upgraded minimum EF rating). :param full_storm_id_strings: length-E list of storm IDs. :param storm_times_unix_sec: length-E numpy array of valid times. :param orig_target_values: length-E numpy array of original target values (for original minimum EF rating), all integers in 0...1. :return: new_target_values: length-E numpy array of new target values (integers in -1...1). -1 means that increasing minimum EF rating flipped the value from 1 to 0. """ storm_spc_date_strings = numpy.array([ time_conversion.time_to_spc_date_string(t) for t in storm_times_unix_sec ]) unique_spc_date_strings = numpy.unique(storm_spc_date_strings) event_type_string = target_val_utils.target_name_to_params( new_target_name)[target_val_utils.EVENT_TYPE_KEY] num_spc_dates = len(unique_spc_date_strings) num_storm_objects = len(full_storm_id_strings) new_target_values = numpy.full(num_storm_objects, numpy.nan) for i in range(num_spc_dates): this_target_file_name = target_val_utils.find_target_file( top_directory_name=top_target_dir_name, event_type_string=event_type_string, spc_date_string=unique_spc_date_strings[i]) print('Reading data from: "{0:s}"...'.format(this_target_file_name)) this_target_value_dict = target_val_utils.read_target_values( netcdf_file_name=this_target_file_name, target_names=[new_target_name]) these_storm_indices = numpy.where( storm_spc_date_strings == unique_spc_date_strings[i])[0] these_target_indices = tracking_utils.find_storm_objects( all_id_strings=this_target_value_dict[ target_val_utils.FULL_IDS_KEY], all_times_unix_sec=this_target_value_dict[ target_val_utils.VALID_TIMES_KEY], id_strings_to_keep=[ full_storm_id_strings[k] for k in these_storm_indices ], times_to_keep_unix_sec=storm_times_unix_sec[these_storm_indices], allow_missing=False) new_target_values[these_storm_indices] = this_target_value_dict[ target_val_utils.TARGET_MATRIX_KEY][these_target_indices, 0] assert not numpy.any(numpy.isnan(new_target_values)) new_target_values = numpy.round(new_target_values).astype(int) bad_indices = numpy.where(new_target_values != orig_target_values)[0] print(('\n{0:d} of {1:d} new target values do not match original value.' ).format(len(bad_indices), num_storm_objects)) new_target_values[bad_indices] = -1 return new_target_values
def read_polygons_from_netcdf(netcdf_file_name, metadata_dict=None, spc_date_unix_sec=None, tracking_start_time_unix_sec=None, tracking_end_time_unix_sec=None, raise_error_if_fails=True): """Reads storm polygons (outlines of storm cells) from NetCDF file. P = number of grid points in storm cell (different for each storm cell) V = number of vertices in storm polygon (different for each storm cell) If file cannot be opened, returns None. :param netcdf_file_name: Path to input file. :param metadata_dict: Dictionary with metadata for NetCDF file, created by `radar_io.read_metadata_from_raw_file`. :param spc_date_unix_sec: SPC date; :param tracking_start_time_unix_sec: Start time for tracking period. This can be found by `get_start_end_times_for_spc_date`. :param tracking_end_time_unix_sec: End time for tracking period. This can be found by `get_start_end_times_for_spc_date`. :param raise_error_if_fails: Boolean flag. If True and file cannot be opened, this method will raise an error. :return: polygon_table: If file cannot be opened and raise_error_if_fails = False, this is None. Otherwise, it is a pandas DataFrame with the following columns. polygon_table.storm_id: String ID for storm cell. polygon_table.unix_time_sec: Time in Unix format. polygon_table.spc_date_unix_sec: SPC date in Unix format. polygon_table.tracking_start_time_unix_sec: Start time for tracking period. polygon_table.tracking_end_time_unix_sec: End time for tracking period. polygon_table.centroid_lat_deg: Latitude at centroid of storm cell (deg N). polygon_table.centroid_lng_deg: Longitude at centroid of storm cell (deg E). polygon_table.grid_point_latitudes_deg: length-P numpy array with latitudes (deg N) of grid points in storm cell. polygon_table.grid_point_longitudes_deg: length-P numpy array with longitudes (deg E) of grid points in storm cell. polygon_table.grid_point_rows: length-P numpy array with row indices (all integers) of grid points in storm cell. polygon_table.grid_point_columns: length-P numpy array with column indices (all integers) of grid points in storm cell. polygon_table.polygon_object_latlng: Instance of `shapely.geometry.Polygon` with vertices in lat-long coordinates. polygon_table.polygon_object_rowcol: Instance of `shapely.geometry.Polygon` with vertices in row-column coordinates. """ error_checking.assert_file_exists(netcdf_file_name) error_checking.assert_is_integer(spc_date_unix_sec) error_checking.assert_is_not_nan(spc_date_unix_sec) error_checking.assert_is_integer(tracking_start_time_unix_sec) error_checking.assert_is_not_nan(tracking_start_time_unix_sec) error_checking.assert_is_integer(tracking_end_time_unix_sec) error_checking.assert_is_not_nan(tracking_end_time_unix_sec) netcdf_dataset = netcdf_io.open_netcdf(netcdf_file_name, raise_error_if_fails) if netcdf_dataset is None: return None storm_id_var_name = metadata_dict[radar_io.FIELD_NAME_COLUMN] storm_id_var_name_orig = metadata_dict[radar_io.FIELD_NAME_COLUMN_ORIG] num_values = len(netcdf_dataset.variables[radar_io.GRID_ROW_COLUMN_ORIG]) if num_values == 0: sparse_grid_dict = { radar_io.GRID_ROW_COLUMN: numpy.array([], dtype=int), radar_io.GRID_COLUMN_COLUMN: numpy.array([], dtype=int), radar_io.NUM_GRID_CELL_COLUMN: numpy.array([], dtype=int), storm_id_var_name: numpy.array([], dtype=int) } else: sparse_grid_dict = { radar_io.GRID_ROW_COLUMN: netcdf_dataset.variables[radar_io.GRID_ROW_COLUMN_ORIG][:], radar_io.GRID_COLUMN_COLUMN: netcdf_dataset.variables[radar_io.GRID_COLUMN_COLUMN_ORIG][:], radar_io.NUM_GRID_CELL_COLUMN: netcdf_dataset.variables[radar_io.NUM_GRID_CELL_COLUMN_ORIG][:], storm_id_var_name: netcdf_dataset.variables[storm_id_var_name_orig][:] } netcdf_dataset.close() sparse_grid_table = pandas.DataFrame.from_dict(sparse_grid_dict) numeric_storm_id_matrix, _, _ = (radar_s2f.sparse_to_full_grid( sparse_grid_table, metadata_dict)) polygon_table = _storm_id_matrix_to_coord_lists(numeric_storm_id_matrix) num_storms = len(polygon_table.index) unix_times_sec = numpy.full(num_storms, metadata_dict[radar_io.UNIX_TIME_COLUMN], dtype=int) spc_dates_unix_sec = numpy.full(num_storms, spc_date_unix_sec, dtype=int) tracking_start_times_unix_sec = numpy.full(num_storms, tracking_start_time_unix_sec, dtype=int) tracking_end_times_unix_sec = numpy.full(num_storms, tracking_end_time_unix_sec, dtype=int) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) storm_ids = _append_spc_date_to_storm_ids( polygon_table[tracking_io.STORM_ID_COLUMN].values, spc_date_string) simple_array = numpy.full(num_storms, numpy.nan) object_array = numpy.full(num_storms, numpy.nan, dtype=object) nested_array = polygon_table[[ tracking_io.STORM_ID_COLUMN, tracking_io.STORM_ID_COLUMN ]].values.tolist() argument_dict = { tracking_io.STORM_ID_COLUMN: storm_ids, tracking_io.TIME_COLUMN: unix_times_sec, tracking_io.SPC_DATE_COLUMN: spc_dates_unix_sec, tracking_io.TRACKING_START_TIME_COLUMN: tracking_start_times_unix_sec, tracking_io.TRACKING_END_TIME_COLUMN: tracking_end_times_unix_sec, tracking_io.CENTROID_LAT_COLUMN: simple_array, tracking_io.CENTROID_LNG_COLUMN: simple_array, tracking_io.GRID_POINT_LAT_COLUMN: nested_array, tracking_io.GRID_POINT_LNG_COLUMN: nested_array, tracking_io.POLYGON_OBJECT_LATLNG_COLUMN: object_array, tracking_io.POLYGON_OBJECT_ROWCOL_COLUMN: object_array } polygon_table = polygon_table.assign(**argument_dict) for i in range(num_storms): these_vertex_rows, these_vertex_columns = ( polygons.grid_points_in_poly_to_vertices( polygon_table[tracking_io.GRID_POINT_ROW_COLUMN].values[i], polygon_table[tracking_io.GRID_POINT_COLUMN_COLUMN].values[i])) (polygon_table[tracking_io.GRID_POINT_ROW_COLUMN].values[i], polygon_table[tracking_io.GRID_POINT_COLUMN_COLUMN].values[i]) = ( polygons.simple_polygon_to_grid_points(these_vertex_rows, these_vertex_columns)) (polygon_table[tracking_io.GRID_POINT_LAT_COLUMN].values[i], polygon_table[tracking_io.GRID_POINT_LNG_COLUMN].values[i]) = ( radar_io.rowcol_to_latlng( polygon_table[tracking_io.GRID_POINT_ROW_COLUMN].values[i], polygon_table[tracking_io.GRID_POINT_COLUMN_COLUMN].values[i], nw_grid_point_lat_deg=metadata_dict[ radar_io.NW_GRID_POINT_LAT_COLUMN], nw_grid_point_lng_deg=metadata_dict[ radar_io.NW_GRID_POINT_LNG_COLUMN], lat_spacing_deg=metadata_dict[radar_io.LAT_SPACING_COLUMN], lng_spacing_deg=metadata_dict[radar_io.LNG_SPACING_COLUMN])) these_vertex_lat_deg, these_vertex_lng_deg = radar_io.rowcol_to_latlng( these_vertex_rows, these_vertex_columns, nw_grid_point_lat_deg=metadata_dict[ radar_io.NW_GRID_POINT_LAT_COLUMN], nw_grid_point_lng_deg=metadata_dict[ radar_io.NW_GRID_POINT_LNG_COLUMN], lat_spacing_deg=metadata_dict[radar_io.LAT_SPACING_COLUMN], lng_spacing_deg=metadata_dict[radar_io.LNG_SPACING_COLUMN]) (polygon_table[tracking_io.CENTROID_LAT_COLUMN].values[i], polygon_table[tracking_io.CENTROID_LNG_COLUMN].values[i]) = ( polygons.get_latlng_centroid(these_vertex_lat_deg, these_vertex_lng_deg)) polygon_table[tracking_io.POLYGON_OBJECT_ROWCOL_COLUMN].values[i] = ( polygons.vertex_arrays_to_polygon_object(these_vertex_columns, these_vertex_rows)) polygon_table[tracking_io.POLYGON_OBJECT_LATLNG_COLUMN].values[i] = ( polygons.vertex_arrays_to_polygon_object(these_vertex_lng_deg, these_vertex_lat_deg)) return polygon_table
def _link_tornadoes_one_period(tracking_file_names, tornado_dir_name, genesis_only, top_output_dir_name): """Links tornadoes to storms for one continuous period. :param tracking_file_names: 1-D list of paths to tracking files. Each will be read by `storm_tracking_io.read_processed_file`. :param tornado_dir_name: See documentation at top of file. :param genesis_only: Same. :param top_output_dir_name: Same. """ storm_to_tornadoes_table, tornado_to_storm_table, metadata_dict = ( linkage.link_storms_to_tornadoes( tracking_file_names=tracking_file_names, tornado_directory_name=tornado_dir_name, genesis_only=genesis_only)) print(SEPARATOR_STRING) event_type_string = (linkage.TORNADOGENESIS_EVENT_STRING if genesis_only else linkage.TORNADO_EVENT_STRING) spc_date_string_by_storm_object = [ time_conversion.time_to_spc_date_string(t) for t in storm_to_tornadoes_table[tracking_utils.VALID_TIME_COLUMN].values ] unique_spc_date_strings, orig_to_unique_indices = numpy.unique( numpy.array(spc_date_string_by_storm_object), return_inverse=True) for i in range(len(unique_spc_date_strings)): this_output_file_name = linkage.find_linkage_file( top_directory_name=top_output_dir_name, event_type_string=event_type_string, spc_date_string=unique_spc_date_strings[i], raise_error_if_missing=False) print('Writing linkages to: "{0:s}"...'.format(this_output_file_name)) these_storm_object_rows = numpy.where(orig_to_unique_indices == i)[0] these_storm_times_unix_sec = storm_to_tornadoes_table[ tracking_utils.VALID_TIME_COLUMN].values[these_storm_object_rows] this_min_time_unix_sec = ( numpy.min(these_storm_times_unix_sec) - metadata_dict[linkage.MAX_TIME_BEFORE_START_KEY]) this_max_time_unix_sec = ( numpy.max(these_storm_times_unix_sec) + metadata_dict[linkage.MAX_TIME_AFTER_END_KEY]) if genesis_only: these_event_rows = numpy.where( numpy.logical_and( tornado_to_storm_table[linkage.EVENT_TIME_COLUMN].values >= this_min_time_unix_sec, tornado_to_storm_table[linkage.EVENT_TIME_COLUMN].values <= this_max_time_unix_sec))[0] this_tornado_to_storm_table = tornado_to_storm_table.iloc[ these_event_rows] else: column_dict_old_to_new = { linkage.EVENT_TIME_COLUMN: tornado_io.TIME_COLUMN, linkage.EVENT_LATITUDE_COLUMN: tornado_io.LATITUDE_COLUMN, linkage.EVENT_LONGITUDE_COLUMN: tornado_io.LONGITUDE_COLUMN } this_tornado_table = tornado_to_storm_table.rename( columns=column_dict_old_to_new, inplace=False) this_tornado_table = tornado_io.segments_to_tornadoes( this_tornado_table) this_tornado_table = tornado_io.subset_tornadoes( tornado_table=this_tornado_table, min_time_unix_sec=this_min_time_unix_sec, max_time_unix_sec=this_max_time_unix_sec) these_tornado_id_strings = this_tornado_table[ tornado_io.TORNADO_ID_COLUMN].values this_tornado_to_storm_table = tornado_to_storm_table.loc[ tornado_to_storm_table[tornado_io.TORNADO_ID_COLUMN].isin( these_tornado_id_strings)] linkage.write_linkage_file( pickle_file_name=this_output_file_name, storm_to_events_table=storm_to_tornadoes_table. iloc[these_storm_object_rows], metadata_dict=metadata_dict, tornado_to_storm_table=this_tornado_to_storm_table) print(SEPARATOR_STRING)
def find_polygon_files_for_spc_date(spc_date_unix_sec=None, top_raw_directory_name=None, tracking_scale_metres2=None, raise_error_if_missing=True): """Finds all polygon files for one SPC date. :param spc_date_unix_sec: SPC date. :param top_raw_directory_name: Name of top-level directory with raw segmotion files. :param tracking_scale_metres2: Tracking scale. :param raise_error_if_missing: If True and no files can be found, this method will raise an error. :return: polygon_file_names: 1-D list of paths to polygon files. """ error_checking.assert_is_string(top_raw_directory_name) spc_date_string = time_conversion.time_to_spc_date_string( spc_date_unix_sec) directory_name = '{0:s}/{1:s}'.format( top_raw_directory_name, _get_relative_polygon_dir_physical_scale(spc_date_string, tracking_scale_metres2)) first_hour_unix_sec = SPC_DATE_START_HOUR * HOURS_TO_SECONDS + ( time_conversion.string_to_unix_sec(spc_date_string, time_conversion.SPC_DATE_FORMAT)) last_hour_unix_sec = SPC_DATE_END_HOUR * HOURS_TO_SECONDS + ( time_conversion.string_to_unix_sec(spc_date_string, time_conversion.SPC_DATE_FORMAT)) hours_in_spc_date_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_hour_unix_sec, end_time_unix_sec=last_hour_unix_sec, time_interval_sec=HOURS_TO_SECONDS, include_endpoint=True) polygon_file_names = [] for this_hour_unix_sec in hours_in_spc_date_unix_sec: this_time_string_seconds = time_conversion.unix_sec_to_string( this_hour_unix_sec, TIME_FORMAT_IN_FILES) this_time_string_hours = time_conversion.unix_sec_to_string( this_hour_unix_sec, TIME_FORMAT_IN_FILES_HOUR_ONLY) + '*' this_pathless_file_name_zipped = _get_pathless_polygon_file_name( this_hour_unix_sec, zipped=True) this_pathless_file_pattern_zipped = ( this_pathless_file_name_zipped.replace(this_time_string_seconds, this_time_string_hours)) this_file_pattern_zipped = '{0:s}/{1:s}'.format( directory_name, this_pathless_file_pattern_zipped) these_polygon_file_names_zipped = glob.glob(this_file_pattern_zipped) if these_polygon_file_names_zipped: polygon_file_names += these_polygon_file_names_zipped this_pathless_file_name_unzipped = _get_pathless_polygon_file_name( this_hour_unix_sec, zipped=False) this_pathless_file_pattern_unzipped = ( this_pathless_file_name_unzipped.replace(this_time_string_seconds, this_time_string_hours)) this_file_pattern_unzipped = '{0:s}/{1:s}'.format( directory_name, this_pathless_file_pattern_unzipped) these_polygon_file_names_unzipped = glob.glob( this_file_pattern_unzipped) for this_file_name_unzipped in these_polygon_file_names_unzipped: this_file_name_zipped = (this_file_name_unzipped + GZIP_FILE_EXTENSION) if this_file_name_zipped in polygon_file_names: continue polygon_file_names.append(this_file_name_unzipped) if raise_error_if_missing and not polygon_file_names: raise ValueError('Cannot find any polygon files in directory: ' + directory_name) polygon_file_names.sort() return polygon_file_names
def _run(top_gridrad_dir_name, first_spc_date_string, last_spc_date_string, colour_map_name, grid_spacing_metres, output_file_name): """Plots GridRad domains. This is effectively the main method. :param top_gridrad_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param colour_map_name: Same. :param grid_spacing_metres: Same. :param output_file_name: Same. """ colour_map_object = pyplot.get_cmap(colour_map_name) file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name) first_time_unix_sec = time_conversion.get_start_of_spc_date( first_spc_date_string) last_time_unix_sec = time_conversion.get_end_of_spc_date( last_spc_date_string) valid_times_unix_sec = time_periods.range_and_interval_to_list( start_time_unix_sec=first_time_unix_sec, end_time_unix_sec=last_time_unix_sec, time_interval_sec=TIME_INTERVAL_SEC, include_endpoint=True) valid_spc_date_strings = [ time_conversion.time_to_spc_date_string(t) for t in valid_times_unix_sec ] domain_min_latitudes_deg = [] domain_max_latitudes_deg = [] domain_min_longitudes_deg = [] domain_max_longitudes_deg = [] prev_domain_limits_deg = numpy.full(4, numpy.nan) prev_spc_date_string = 'foo' num_times = len(valid_times_unix_sec) for i in range(num_times): this_gridrad_file_name = gridrad_io.find_file( unix_time_sec=valid_times_unix_sec[i], top_directory_name=top_gridrad_dir_name, raise_error_if_missing=False) if not os.path.isfile(this_gridrad_file_name): continue these_domain_limits_deg = _get_domain_one_file(this_gridrad_file_name) same_domain = (valid_spc_date_strings[i] == prev_spc_date_string and numpy.allclose(these_domain_limits_deg, prev_domain_limits_deg, TOLERANCE)) if same_domain: continue prev_domain_limits_deg = these_domain_limits_deg + 0. prev_spc_date_string = valid_spc_date_strings[i] domain_min_latitudes_deg.append(these_domain_limits_deg[0]) domain_max_latitudes_deg.append(these_domain_limits_deg[1]) domain_min_longitudes_deg.append(these_domain_limits_deg[2]) domain_max_longitudes_deg.append(these_domain_limits_deg[3]) print(SEPARATOR_STRING) domain_min_latitudes_deg = numpy.array(domain_min_latitudes_deg) domain_max_latitudes_deg = numpy.array(domain_max_latitudes_deg) domain_min_longitudes_deg = numpy.array(domain_min_longitudes_deg) domain_max_longitudes_deg = numpy.array(domain_max_longitudes_deg) num_domains = len(domain_min_latitudes_deg) grid_metadata_dict = grids.create_equidistant_grid( min_latitude_deg=OVERALL_MIN_LATITUDE_DEG, max_latitude_deg=OVERALL_MAX_LATITUDE_DEG, min_longitude_deg=OVERALL_MIN_LONGITUDE_DEG, max_longitude_deg=OVERALL_MAX_LONGITUDE_DEG, x_spacing_metres=grid_spacing_metres, y_spacing_metres=grid_spacing_metres, azimuthal=False) unique_x_coords_metres = grid_metadata_dict[grids.X_COORDS_KEY] unique_y_coords_metres = grid_metadata_dict[grids.Y_COORDS_KEY] projection_object = grid_metadata_dict[grids.PROJECTION_KEY] x_coord_matrix_metres, y_coord_matrix_metres = grids.xy_vectors_to_matrices( x_unique_metres=unique_x_coords_metres, y_unique_metres=unique_y_coords_metres) latitude_matrix_deg, longitude_matrix_deg = ( projections.project_xy_to_latlng(x_coords_metres=x_coord_matrix_metres, y_coords_metres=y_coord_matrix_metres, projection_object=projection_object)) num_grid_rows = latitude_matrix_deg.shape[0] num_grid_columns = latitude_matrix_deg.shape[1] num_days_matrix = numpy.full((num_grid_rows, num_grid_columns), 0) for i in range(num_domains): if numpy.mod(i, 10) == 0: print('Have found grid points in {0:d} of {1:d} domains...'.format( i, num_domains)) this_lat_flag_matrix = numpy.logical_and( latitude_matrix_deg >= domain_min_latitudes_deg[i], latitude_matrix_deg <= domain_max_latitudes_deg[i]) this_lng_flag_matrix = numpy.logical_and( longitude_matrix_deg >= domain_min_longitudes_deg[i], longitude_matrix_deg <= domain_max_longitudes_deg[i]) num_days_matrix += numpy.logical_and(this_lat_flag_matrix, this_lng_flag_matrix).astype(int) print(SEPARATOR_STRING) figure_object, axes_object = _plot_data( num_days_matrix=num_days_matrix, grid_metadata_dict=grid_metadata_dict, colour_map_object=colour_map_object) plotting_utils.label_axes(axes_object=axes_object, label_string='(c)') print('Saving figure to: "{0:s}"...'.format(output_file_name)) figure_object.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI, pad_inches=0, bbox_inches='tight') pyplot.close(figure_object)
def _shuffle_data_with_smart_io(storm_object_table=None, file_dict=None, working_spc_date_unix_sec=None, read_from_intermediate=None): """Shuffles data with smart IO. Specifically, this method ensures that only SPC dates (k - 1)...(k + 1) are in memory, where k is the date currently being worked on. :param storm_object_table: pandas DataFrame with columns documented in _write_intermediate_results. :param file_dict: See documentation for find_files_for_smart_io.. :param working_spc_date_unix_sec: Next SPC date to work on. :param read_from_intermediate: Boolean flag. If True, will read from intermediate files. If False, will read from input files. :return: storm_object_table: pandas DataFrame with columns documented in _write_intermediate_results. """ working_spc_date_index = numpy.where( file_dict[SPC_DATES_KEY] == working_spc_date_unix_sec)[0][0] num_spc_dates = len(file_dict[SPC_DATES_KEY]) if working_spc_date_index == 0: read_spc_date_indices = numpy.array([0, 1], dtype=int) write_spc_date_indices = numpy.array( [num_spc_dates - 2, num_spc_dates - 1], dtype=int) clear_table = True elif working_spc_date_index == num_spc_dates - 1: read_spc_date_indices = numpy.array([], dtype=int) write_spc_date_indices = numpy.array([num_spc_dates - 3], dtype=int) clear_table = False else: read_spc_date_indices = numpy.array([working_spc_date_index + 1], dtype=int) write_spc_date_indices = numpy.array([working_spc_date_index - 2], dtype=int) clear_table = False read_spc_date_indices = read_spc_date_indices[read_spc_date_indices >= 0] read_spc_date_indices = read_spc_date_indices[ read_spc_date_indices < num_spc_dates] write_spc_date_indices = write_spc_date_indices[ write_spc_date_indices >= 0] write_spc_date_indices = write_spc_date_indices[ write_spc_date_indices < num_spc_dates] if storm_object_table is not None: for this_index in write_spc_date_indices: this_spc_date_unix_sec = file_dict[SPC_DATES_KEY][this_index] this_spc_date_string = time_conversion.time_to_spc_date_string( this_spc_date_unix_sec) this_spc_date_indices = numpy.where( storm_object_table[tracking_io.SPC_DATE_COLUMN].values == this_spc_date_unix_sec)[0] this_temp_file_name = file_dict[TEMP_FILE_NAMES_KEY][this_index] print('Writing intermediate data for ' + this_spc_date_string + ': ' + this_temp_file_name + '...') _write_intermediate_results( storm_object_table.iloc[this_spc_date_indices], this_temp_file_name) storm_object_table.drop( storm_object_table.index[this_spc_date_indices], axis=0, inplace=True) if clear_table: storm_object_table = None for this_index in read_spc_date_indices: this_spc_date_unix_sec = file_dict[SPC_DATES_KEY][this_index] this_spc_date_string = time_conversion.time_to_spc_date_string( this_spc_date_unix_sec) if read_from_intermediate: this_temp_file_name = file_dict[TEMP_FILE_NAMES_KEY][this_index] print('Reading intermediate data for ' + this_spc_date_string + ': ' + this_temp_file_name + '...') this_storm_object_table = _read_intermediate_results( this_temp_file_name) else: this_storm_object_table = best_tracks.read_input_storm_objects( file_dict[INPUT_FILE_NAMES_KEY][this_index], keep_spc_date=True) these_centroid_x_metres, these_centroid_y_metres = ( projections.project_latlng_to_xy( this_storm_object_table[ tracking_io.CENTROID_LAT_COLUMN].values, this_storm_object_table[ tracking_io.CENTROID_LNG_COLUMN].values, projection_object=PROJECTION_OBJECT, false_easting_metres=0., false_northing_metres=0.)) argument_dict = { best_tracks.CENTROID_X_COLUMN: these_centroid_x_metres, best_tracks.CENTROID_Y_COLUMN: these_centroid_y_metres } this_storm_object_table = this_storm_object_table.assign( **argument_dict) this_storm_object_table.drop([ tracking_io.CENTROID_LAT_COLUMN, tracking_io.CENTROID_LNG_COLUMN ], axis=1, inplace=True) if storm_object_table is None: storm_object_table = copy.deepcopy(this_storm_object_table) else: this_storm_object_table, _ = this_storm_object_table.align( storm_object_table, axis=1) storm_object_table = pandas.concat( [storm_object_table, this_storm_object_table], axis=0, ignore_index=True) return storm_object_table
def _run(top_tracking_dir_name, first_spc_date_string, last_spc_date_string, storm_colour, storm_opacity, include_secondary_ids, min_plot_latitude_deg, max_plot_latitude_deg, min_plot_longitude_deg, max_plot_longitude_deg, top_myrorss_dir_name, radar_field_name, radar_height_m_asl, output_dir_name): """Plots storm outlines (along with IDs) at each time step. This is effectively the main method. :param top_tracking_dir_name: See documentation at top of file. :param first_spc_date_string: Same. :param last_spc_date_string: Same. :param storm_colour: Same. :param storm_opacity: Same. :param include_secondary_ids: Same. :param min_plot_latitude_deg: Same. :param max_plot_latitude_deg: Same. :param min_plot_longitude_deg: Same. :param max_plot_longitude_deg: Same. :param top_myrorss_dir_name: Same. :param radar_field_name: Same. :param radar_height_m_asl: Same. :param output_dir_name: Same. """ if top_myrorss_dir_name in ['', 'None']: top_myrorss_dir_name = None if radar_field_name != radar_utils.REFL_NAME: radar_height_m_asl = None file_system_utils.mkdir_recursive_if_necessary( directory_name=output_dir_name) spc_date_strings = time_conversion.get_spc_dates_in_range( first_spc_date_string=first_spc_date_string, last_spc_date_string=last_spc_date_string) tracking_file_names = [] for this_spc_date_string in spc_date_strings: tracking_file_names += (tracking_io.find_files_one_spc_date( top_tracking_dir_name=top_tracking_dir_name, tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2, source_name=DUMMY_SOURCE_NAME, spc_date_string=this_spc_date_string, raise_error_if_missing=False)[0]) storm_object_table = tracking_io.read_many_files(tracking_file_names) print(SEPARATOR_STRING) latitude_limits_deg, longitude_limits_deg = _get_plotting_limits( min_plot_latitude_deg=min_plot_latitude_deg, max_plot_latitude_deg=max_plot_latitude_deg, min_plot_longitude_deg=min_plot_longitude_deg, max_plot_longitude_deg=max_plot_longitude_deg, storm_object_table=storm_object_table) min_plot_latitude_deg = latitude_limits_deg[0] max_plot_latitude_deg = latitude_limits_deg[1] min_plot_longitude_deg = longitude_limits_deg[0] max_plot_longitude_deg = longitude_limits_deg[1] valid_times_unix_sec = numpy.unique( storm_object_table[tracking_utils.VALID_TIME_COLUMN].values) num_times = len(valid_times_unix_sec) for i in range(num_times): these_current_rows = numpy.where( storm_object_table[tracking_utils.VALID_TIME_COLUMN].values == valid_times_unix_sec[i])[0] these_current_subrows = _filter_storm_objects_latlng( storm_object_table=storm_object_table.iloc[these_current_rows], min_latitude_deg=min_plot_latitude_deg, max_latitude_deg=max_plot_latitude_deg, min_longitude_deg=min_plot_longitude_deg, max_longitude_deg=max_plot_longitude_deg) if len(these_current_subrows) == 0: continue these_current_rows = these_current_rows[these_current_subrows] this_storm_object_table = _find_relevant_storm_objects( storm_object_table=storm_object_table, current_rows=these_current_rows) these_latlng_rows = _filter_storm_objects_latlng( storm_object_table=this_storm_object_table, min_latitude_deg=min_plot_latitude_deg, max_latitude_deg=max_plot_latitude_deg, min_longitude_deg=min_plot_longitude_deg, max_longitude_deg=max_plot_longitude_deg) if top_myrorss_dir_name is None: this_radar_matrix = None these_radar_latitudes_deg = None these_radar_longitudes_deg = None else: this_myrorss_file_name = myrorss_and_mrms_io.find_raw_file( top_directory_name=top_myrorss_dir_name, unix_time_sec=valid_times_unix_sec[i], spc_date_string=time_conversion.time_to_spc_date_string( valid_times_unix_sec[i]), field_name=radar_field_name, data_source=radar_utils.MYRORSS_SOURCE_ID, height_m_asl=radar_height_m_asl, raise_error_if_missing=True) print( 'Reading data from: "{0:s}"...'.format(this_myrorss_file_name)) this_metadata_dict = ( myrorss_and_mrms_io.read_metadata_from_raw_file( netcdf_file_name=this_myrorss_file_name, data_source=radar_utils.MYRORSS_SOURCE_ID)) this_sparse_grid_table = ( myrorss_and_mrms_io.read_data_from_sparse_grid_file( netcdf_file_name=this_myrorss_file_name, field_name_orig=this_metadata_dict[ myrorss_and_mrms_io.FIELD_NAME_COLUMN_ORIG], data_source=radar_utils.MYRORSS_SOURCE_ID, sentinel_values=this_metadata_dict[ radar_utils.SENTINEL_VALUE_COLUMN])) (this_radar_matrix, these_radar_latitudes_deg, these_radar_longitudes_deg) = radar_s2f.sparse_to_full_grid( sparse_grid_table=this_sparse_grid_table, metadata_dict=this_metadata_dict) this_radar_matrix = numpy.flipud(this_radar_matrix) these_radar_latitudes_deg = these_radar_latitudes_deg[::-1] _, this_axes_object, this_basemap_object = ( plotting_utils.create_equidist_cylindrical_map( min_latitude_deg=min_plot_latitude_deg, max_latitude_deg=max_plot_latitude_deg, min_longitude_deg=min_plot_longitude_deg, max_longitude_deg=max_plot_longitude_deg, resolution_string='i')) _plot_storm_outlines_one_time( storm_object_table=this_storm_object_table.iloc[these_latlng_rows], valid_time_unix_sec=valid_times_unix_sec[i], axes_object=this_axes_object, basemap_object=this_basemap_object, storm_colour=storm_colour, storm_opacity=storm_opacity, include_secondary_ids=include_secondary_ids, output_dir_name=output_dir_name, radar_matrix=this_radar_matrix, radar_field_name=radar_field_name, radar_latitudes_deg=these_radar_latitudes_deg, radar_longitudes_deg=these_radar_longitudes_deg)
def _plot_tornado_and_radar(top_myrorss_dir_name, radar_field_name, radar_height_m_asl, spc_date_string, tornado_table, tornado_row, output_file_name): """Plots one unlinked tornado with radar field. :param top_myrorss_dir_name: See documentation at top of file. :param radar_field_name: Same. :param radar_height_m_asl: Same. :param spc_date_string: SPC date for linkage file (format "yyyymmdd"). :param tornado_table: pandas DataFrame created by `linkage._read_input_tornado_reports`. :param tornado_row: Will plot only tornado in [j]th row of table, where j = `tornado_row`. :param output_file_name: Path to output file. Figure will be saved here. """ tornado_time_unix_sec = tornado_table[ linkage.EVENT_TIME_COLUMN].values[tornado_row] radar_time_unix_sec = number_rounding.round_to_nearest( tornado_time_unix_sec, RADAR_TIME_INTERVAL_SEC) radar_spc_date_string = time_conversion.time_to_spc_date_string( radar_time_unix_sec) radar_file_name = myrorss_and_mrms_io.find_raw_file( top_directory_name=top_myrorss_dir_name, spc_date_string=radar_spc_date_string, unix_time_sec=radar_time_unix_sec, data_source=radar_utils.MYRORSS_SOURCE_ID, field_name=radar_field_name, height_m_asl=radar_height_m_asl, raise_error_if_missing=spc_date_string == radar_spc_date_string) if not os.path.isfile(radar_file_name): first_radar_time_unix_sec = number_rounding.ceiling_to_nearest( time_conversion.get_start_of_spc_date(spc_date_string), RADAR_TIME_INTERVAL_SEC) last_radar_time_unix_sec = number_rounding.floor_to_nearest( time_conversion.get_end_of_spc_date(spc_date_string), RADAR_TIME_INTERVAL_SEC) radar_time_unix_sec = max( [radar_time_unix_sec, first_radar_time_unix_sec]) radar_time_unix_sec = min( [radar_time_unix_sec, last_radar_time_unix_sec]) radar_file_name = myrorss_and_mrms_io.find_raw_file( top_directory_name=top_myrorss_dir_name, spc_date_string=spc_date_string, unix_time_sec=radar_time_unix_sec, data_source=radar_utils.MYRORSS_SOURCE_ID, field_name=radar_field_name, height_m_asl=radar_height_m_asl, raise_error_if_missing=True) radar_metadata_dict = myrorss_and_mrms_io.read_metadata_from_raw_file( netcdf_file_name=radar_file_name, data_source=radar_utils.MYRORSS_SOURCE_ID) sparse_grid_table = (myrorss_and_mrms_io.read_data_from_sparse_grid_file( netcdf_file_name=radar_file_name, field_name_orig=radar_metadata_dict[ myrorss_and_mrms_io.FIELD_NAME_COLUMN_ORIG], data_source=radar_utils.MYRORSS_SOURCE_ID, sentinel_values=radar_metadata_dict[radar_utils.SENTINEL_VALUE_COLUMN]) ) radar_matrix, grid_point_latitudes_deg, grid_point_longitudes_deg = ( radar_s2f.sparse_to_full_grid(sparse_grid_table=sparse_grid_table, metadata_dict=radar_metadata_dict)) radar_matrix = numpy.flip(radar_matrix, axis=0) grid_point_latitudes_deg = grid_point_latitudes_deg[::-1] axes_object, basemap_object = ( plotting_utils.create_equidist_cylindrical_map( min_latitude_deg=numpy.min(grid_point_latitudes_deg), max_latitude_deg=numpy.max(grid_point_latitudes_deg), min_longitude_deg=numpy.min(grid_point_longitudes_deg), max_longitude_deg=numpy.max(grid_point_longitudes_deg), resolution_string='i')[1:]) plotting_utils.plot_coastlines(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_countries(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_states_and_provinces(basemap_object=basemap_object, axes_object=axes_object, line_colour=BORDER_COLOUR) plotting_utils.plot_parallels(basemap_object=basemap_object, axes_object=axes_object, num_parallels=NUM_PARALLELS) plotting_utils.plot_meridians(basemap_object=basemap_object, axes_object=axes_object, num_meridians=NUM_MERIDIANS) radar_plotting.plot_latlng_grid( field_matrix=radar_matrix, field_name=radar_field_name, axes_object=axes_object, min_grid_point_latitude_deg=numpy.min(grid_point_latitudes_deg), min_grid_point_longitude_deg=numpy.min(grid_point_longitudes_deg), latitude_spacing_deg=numpy.diff(grid_point_latitudes_deg[:2])[0], longitude_spacing_deg=numpy.diff(grid_point_longitudes_deg[:2])[0]) tornado_latitude_deg = tornado_table[ linkage.EVENT_LATITUDE_COLUMN].values[tornado_row] tornado_longitude_deg = tornado_table[ linkage.EVENT_LONGITUDE_COLUMN].values[tornado_row] axes_object.plot(tornado_longitude_deg, tornado_latitude_deg, linestyle='None', marker=TORNADO_MARKER_TYPE, markersize=TORNADO_MARKER_SIZE, markeredgewidth=TORNADO_MARKER_EDGE_WIDTH, markerfacecolor=plotting_utils.colour_from_numpy_to_tuple( TORNADO_MARKER_COLOUR), markeredgecolor=plotting_utils.colour_from_numpy_to_tuple( TORNADO_MARKER_COLOUR)) tornado_time_string = time_conversion.unix_sec_to_string( tornado_time_unix_sec, TIME_FORMAT) title_string = ( 'Unlinked tornado at {0:s}, {1:.2f} deg N, {2:.2f} deg E').format( tornado_time_string, tornado_latitude_deg, tornado_longitude_deg) pyplot.title(title_string, fontsize=TITLE_FONT_SIZE) print('Saving figure to: "{0:s}"...'.format(output_file_name)) pyplot.savefig(output_file_name, dpi=FIGURE_RESOLUTION_DPI) pyplot.close() imagemagick_utils.trim_whitespace(input_file_name=output_file_name, output_file_name=output_file_name)