Пример #1
0
def test_pb_info_no_subdir():
    # Verify that the prepbufr file information is correctly
    # curated into a list of named tuples. Testing on data that
    # is separated into ymd dated subdirs. Test against the 20170601
    # data file: prepbufr.gdas.2017060100
    pb = pb2nc_wrapper()
    # Make sure we are dealing with the GDAS data
    pb.pb_dict['PREPBUFR_FILE_REGEX'] = 'prepbufr.gdas.(2[0-9]{9})'
    pb.pb_dict['NC_FILE_TMPL'] = 'prepbufr.gdas.{valid?fmt=%Y%m%d%H}.nc]'
    num_expected_files = 117
    data_dir = '/d1/METplus_Mallory/data/prepbufr/gdas'
    file_regex = 'prepbufr.gdas.2[0-9]{9}'
    logger = logging.getLogger("test_log")
    pb_files = util.get_files(data_dir, file_regex, logger)
    time_method = 'valid'

    test_file = os.path.join(data_dir, 'prepbufr.gdas.2017060100')
    pb_info_list = []
    for pb_file in pb_files:
        pb_info = pb.retrieve_pb_time_info(pb_file, time_method)
        pb_info_list.append(pb_info)

    actual_full_filepaths = []
    if len(pb_info_list) != num_expected_files:
        # Fail, number of files is not what was expected
        assert True is False

    for pb_info in pb_info_list:
        actual_full_filepaths.append(pb_info.full_filepath)

    if test_file not in actual_full_filepaths:
        # Fail, expected file not found
        assert True is False
Пример #2
0
def test_pb_info_with_subdir():
    # Verify that the prepbufr file information is correctly
    # curated into a list of named tuples. Testing on data that
    # is separated into ymd dated subdirs. Perform test on only
    # one subdirectory's worth of data.
    pytest.skip('Function no longer used')
    # Make sure we are dealing with the GDAS data
    pb = pb2nc_wrapper()
    pb.c_dict['PREPBUFR_FILE_REGEX'] =\
        'nam.t([0-9]{2})z.prepbufr.tm([0-9]{2})'
    pb.c_dict['NC_FILE_TMPL'] =\
        'prepbufr.{valid?fmt=%Y%m%d%H}.t{cycle?fmt=%HH}z.nc'
    expected_file_subdir =\
        '/d1/METplus_Mallory/data/prepbufr/nam/nam.20170615'
    expected_files = [
        'nam.t00z.prepbufr.tm00', 'nam.t00z.prepbufr.tm03',
        'nam.t06z.prepbufr.tm00', 'nam.t06z.prepbufr.tm03',
        'nam.t12z.prepbufr.tm00', 'nam.t12z.prepbufr.tm03',
        'nam.t18z.prepbufr.tm00', 'nam.t18z.prepbufr.tm03'
    ]
    # expected_ymd = '20170615'
    num_expected_files = 8
    expected_full_filepaths = []
    for expected_file in expected_files:
        expected_full_filepaths.append(
            os.path.join(expected_file_subdir, expected_file))

    # Get the ymd of the first subdirectory
    subdir = '/d1/METplus_Mallory/data/prepbufr/nam/nam.20170615'
    ymd_match = re.match(r'.*(2[0-9]{7}).*', subdir)
    ymd = ymd_match.group(1)
    file_regex = 'nam.t([0-9]{2})z.prepbufr.tm([0-9]{2})'
    logger = logging.getLogger("temp_log")
    pb_files = util.get_files(subdir, file_regex, logger)
    time_method = 'valid'
    all_pb_info = []
    for pb_file in pb_files:
        pb_info = pb.retrieve_pb_time_info(pb_file, time_method, ymd)
        all_pb_info.append(pb_info)

    if len(all_pb_info) != num_expected_files:
        # Fail there should be one entry for each file
        assert True is False

    for expected_full_filepath in expected_full_filepaths:
        if expected_full_filepath not in pb_files:
            assert True is False
Пример #3
0
    def run_at_time(self, input_dict):
        """!Get TC-paris data then regrid tiles centered on the storm.

        Get TC-pairs track data and GFS model data, do any necessary
        processing then regrid the forecast and analysis files to a
        30 x 30 degree tile centered on the storm.
        Args:
            input_dict:  Time dictionary
        Returns:

            None: invokes regrid_data_plane to create a netCDF file from two
                    extratropical storm track files.
        """
        time_info = time_util.ti_calculate(input_dict)
        init_time = time_info['init_fmt']

        # get the process id to be used to identify the output
        # amongst different users and runs.
        cur_pid = str(os.getpid())
        tmp_dir = os.path.join(self.config.getdir('TMP_DIR'), cur_pid)
        self.logger.info("Begin extract tiles")

        cur_init = init_time[0:8] + "_" + init_time[8:10]

        # Check that there are tc_pairs data which are used as input
        if util.is_dir_empty(self.tc_pairs_dir):
            self.logger.error("No tc pairs data found at {}"\
                              .format(self.tc_pairs_dir))
            sys.exit(1)

        # Create the name of the filter file we need to find.  If
        # the file doesn't exist, then run TC_STAT
        filter_filename = "filter_" + cur_init + ".tcst"
        filter_name = os.path.join(self.filtered_out_dir, cur_init,
                                   filter_filename)

        if util.file_exists(filter_name) and not self.overwrite_flag:
            self.logger.debug("Filter file exists, using Track data file: {}"\
                              .format(filter_name))
        else:
            # Create the storm track by applying the
            # filter options defined in the config/param file.
            # Use TcStatWrapper to build up the tc_stat command and invoke
            # the MET tool tc_stat to perform the filtering.
            tiles_list = util.get_files(self.tc_pairs_dir, ".*tcst",
                                        self.logger)
            tiles_list_str = ' '.join(tiles_list)

            tcs = TcStatWrapper(self.config, self.logger)
            tcs.build_tc_stat(self.filtered_out_dir, cur_init, tiles_list_str,
                              self.addl_filter_opts)

            # Remove any empty files and directories that can occur
            # from filtering.
            util.prune_empty(filter_name, self.logger)

        # Now get unique storm ids from the filter file,
        # filter_yyyymmdd_hh.tcst
        sorted_storm_ids = util.get_storm_ids(filter_name, self.logger)

        # Check for empty sorted_storm_ids, if empty,
        # continue to the next time.
        if not sorted_storm_ids:
            # No storms found for init time, cur_init
            msg = "No storms were found for {} ...continue to next in list"\
              .format(cur_init)
            self.logger.debug(msg)
            return

        # Process each storm in the sorted_storm_ids list
        # Iterate over each filter file in the output directory and
        # search for the presence of the storm id.  Store this
        # corresponding row of data into a temporary file in the
        # /tmp/<pid> directory.
        for cur_storm in sorted_storm_ids:
            storm_output_dir = os.path.join(self.filtered_out_dir, cur_init,
                                            cur_storm)
            header = open(filter_name, "r").readline()
            util.mkdir_p(storm_output_dir)
            util.mkdir_p(tmp_dir)
            tmp_filename = "filter_" + cur_init + "_" + cur_storm
            full_tmp_filename = os.path.join(tmp_dir, tmp_filename)

            storm_match_list = util.grep(cur_storm, filter_name)
            with open(full_tmp_filename, "a+") as tmp_file:
                # copy over header information
                tmp_file.write(header)
                for storm_match in storm_match_list:
                    tmp_file.write(storm_match)

            # Perform regridding of the forecast and analysis files
            # to an n X n degree tile centered on the storm (dimensions
            # are indicated in the config/param file).
            feature_util.retrieve_and_regrid(full_tmp_filename, cur_init,
                                             cur_storm, self.filtered_out_dir,
                                             self.config)

        # end of for cur_storm

        # Remove any empty files and directories in the extract_tiles output
        # directory
        util.prune_empty(self.filtered_out_dir, self.logger)

        # Clean up the tmp directory if it exists
        if os.path.isdir(tmp_dir):
            util.rmtree(tmp_dir)
Пример #4
0
    def get_pb_files_by_time(self):
        """! Identify the prepbufr files that are within the specified time
             window and the specified time intervals between initialization
             times.

             Args:

             Returns:
                 files_of_interest : A list of the full filepaths
                                     corresponding to the files of interest
                                     (i.e. those files that are within the
                                     specified start and end times, and the
                                     appropriate interval times)

        """
        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.

        # Used for logging.
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name
        self.logger.info("INFO|:" + cur_function + '|' + cur_filename + '| ' +
                         "Filtering prepbufr files based on time (init or "
                         "valid).")

        # Create a list of times that are expected, based on
        # the begin, end, and interval date/times.
        dates_needed = []
        initial_start_date = \
            self.convert_date_strings_to_unix_times(self.pb_dict['START_DATE'])
        current_start_date = initial_start_date
        interval_time_str = self.pb_dict['INTERVAL_TIME']
        interval_time = int(interval_time_str) * self.HOURS_TO_SECONDS
        start_date_unix = self.convert_date_strings_to_unix_times(
            self.pb_dict['START_DATE'])
        end_date_unix = self.convert_date_strings_to_unix_times(
            self.pb_dict['END_DATE'])
        while start_date_unix <= current_start_date <= end_date_unix:
            dates_needed.append(current_start_date)
            current_start_date = current_start_date + interval_time

        # Iterate through the input prepbufr directories to determine the
        # init or valid times.
        # Get a list of all the sub-directories and files under the
        # PREPBUFR_DATA_DIR/PREPBUFR_MODEL_DIR_NAME
        dir_to_search = os.path.join(self.pb_dict['PREPBUFR_DATA_DIR'],
                                     self.pb_dict['PREPBUFR_MODEL_DIR_NAME'])
        pb_subdirs_list = util.get_dirs(dir_to_search)

        # Determine whether times are to be retrieved based on init times:
        # ymd + cycle hour or valid times: ymd + (cycle hour - offset)
        # initialize the time flag
        if self.pb_dict['TIME_METHOD'].lower() == 'by_init':
            time_flag = 'init'
        elif self.pb_dict['TIME_METHOD'].lower() == 'by_valid':
            time_flag = 'valid'
        else:
            # unsupported time method
            self.logger.error('ERROR|:' + cur_function + '|' + cur_filename +
                              ' Unrecognized time method, only BY_INIT or ' +
                              'BY_VALID are supported. Check the ' +
                              'TIME_METHOD setting in your configuration ' +
                              'file.')
            sys.exit(1)

        # Some prepbufr files are organized into YMD subdirectories with
        # the cycle and offset (fhr) times incorporated into their filenames.
        #
        # There are also prepbufr files that are not separated by YMD and
        # instead have the YMDh incorporated into their filenames.  Provide
        # support for both cases.
        files_within_time_criteria = []
        if pb_subdirs_list:
            for pb_subdir in pb_subdirs_list:
                # Retrieve the YMD from the subdirectory name
                dir_regex = self.pb_dict['PREPBUFR_DIR_REGEX']
                regex_search = re.compile(dir_regex)
                match = re.match(regex_search, pb_subdir)
                if match:
                    ymd = match.group(1)
                    regex_file = self.pb_dict['PREPBUFR_FILE_REGEX']
                    pb_files_list = util.get_files(pb_subdir, regex_file,
                                                   self.logger)
                    if pb_files_list:
                        # Calculate the init or valid time for each file, then
                        # determine if this is found in the list of times of
                        # interest, dates_needed[].
                        for pb_file in pb_files_list:
                            # Create the time information for this file, based
                            # on init time or valid time, as indicated by the
                            # time_flag.
                            pb_file_time_info = \
                                self.retrieve_pb_time_info(pb_file,
                                                           time_flag,
                                                           ymd)
                            if pb_file_time_info.pb_unix_time in dates_needed:
                                files_within_time_criteria.append(
                                    pb_file_time_info.full_filepath)

                    else:
                        # No files in subdirectory, continue to next
                        # subdirectory
                        self.logger.info('INFO:|' + cur_function + '|' +
                                         cur_filename + ' No files found in '
                                         'current '
                                         'subdirectory: ' + pb_subdir +
                                         ' continue checking '
                                         'next available '
                                         'subdirectory for files.')
                        continue

        else:
            # No subdirectories, only files, get the list of files to process
            # These files will have YMD incorporated in the filename.
            files_within_time_criteria = []
            pb_files_list = util.get_files(dir_to_search,
                                           self.pb_dict['PREPBUFR_FILE_REGEX'],
                                           self.logger)
            if not pb_files_list:
                self.logger.error('ERROR:|' + cur_function + '|' +
                                  cur_filename + ' No files were found.  '
                                  'Check the path to '
                                  'the prepbufr '
                                  'data directory in your '
                                  'configuration file.')
                sys.exit(1)
            else:
                for pb_file in pb_files_list:
                    pb_file_time_info = \
                        self.retrieve_pb_time_info(pb_file,
                                                   time_flag)
                    if pb_file_time_info.pb_unix_time in dates_needed:
                        files_within_time_criteria.append(
                            pb_file_time_info.full_filepath)

        return files_within_time_criteria
Пример #5
0
    def retrieve_data(self):
        """! Retrieve data from track files and return the min and max lon.
            Returns:
               None
        """
        self.logger.debug("Begin retrieving data...")
        all_tracks_list = []

        # Store the data in the track list.
        if os.path.isdir(self.input_data):
            self.logger.debug("Generate plot for all files in the directory" +
                              self.input_data)
            # Get the list of all files (full file path) in this directory
            all_init_files = util.get_files(self.input_data, ".*.tcst",
                                            self.logger)

            for init_file in all_init_files:
                # Ignore empty files
                if os.stat(init_file).st_size == 0:
                    self.logger.info("Empty file")
                    continue

                # logger.info("Consider all files under directory" +
                #  init_file + " with " + " init time (ymd): " +
                # self.init_date + " and lead time (hh):" + self.lead_hr)
                with open(init_file, 'r') as infile:
                    self.logger.debug("Parsing file " + init_file)

                    # Extract information from the header, which is
                    # the first line.
                    header = infile.readline()
                    # print("header: {}".format(header))
                    column_indices = self.get_columns_and_indices(header)

                    # For the remaining lines of this file,
                    # retrieve information from each row:
                    # lon, lat, init time, lead hour, valid time,
                    # model name, mslp, and basin.
                    # NOTE: Some of these aren't used until we fully
                    # emulate Guang Ping's plots.
                    for line in infile:
                        track_dict = {}
                        col = line.split()
                        lat = col[column_indices['ALAT']]
                        lon = col[column_indices['ALON']]
                        init_time = col[column_indices['INIT']]
                        fcst_lead_hh = \
                            str(col[column_indices['LEAD']]).zfill(3)
                        model_name = col[column_indices['AMODEL']]
                        valid_time = col[column_indices['VALID']]
                        storm_id = col[column_indices['STORM_ID']]

                        # Not needed until support for regional plots
                        # is implemented.
                        # mslp = col[column_indices['AMSLP']]
                        # basin = col[column_indices['BASIN']]

                        # Check for NA values in lon and lat, skip to
                        # next line in file if 'NA' is encountered.
                        if lon == 'NA' or lat == 'NA':
                            continue
                        else:
                            track_dict['lon'] = float(lon)
                            track_dict['lat'] = float(lat)

                        # If the lead hour is 'NA', skip to next line.
                        # The track data was very likely generated with
                        # TC-Stat set to match-pairs set to True.
                        lead_hr = self.extract_lead_hr(fcst_lead_hh)
                        if lead_hr == 'NA':
                            continue

                        # Check that the init date, init hour
                        # and model name are what the user requested.
                        init_ymd, init_hh = \
                            self.extract_date_and_time_from_init(init_time)

                        if init_ymd == self.init_date and\
                                init_hh == self.init_hr:
                            if model_name == self.model:
                                # Check for the requested model,
                                # if the model matches the requested
                                # model name, then we have all the
                                # necessary information to continue.
                                # Store all data in dictionary
                                track_dict['fcst_lead_hh'] = fcst_lead_hh
                                track_dict['init_time'] = init_time
                                track_dict['model_name'] = model_name
                                track_dict['valid_time'] = valid_time
                                track_dict['storm_id'] = storm_id

                                # Identify the 'first' point of the
                                # storm track.  If the storm id is novel, then
                                # retrieve the date and hh from the valid time
                                if storm_id in self.unique_storm_id:
                                    track_dict['first_point'] = False
                                    track_dict['valid_dd'] = ''
                                    track_dict['valid_hh'] = ''
                                else:
                                    self.unique_storm_id.add(storm_id)
                                    # Since this is the first storm_id with
                                    # a valid value for lat and lon (ie not
                                    # 'NA'), this is the first track point
                                    # in the storm track and will be
                                    # labelled with the corresponding
                                    # date/hh z on the plot.
                                    valid_match = \
                                        re.match(r'[0-9]{6}([0-9]{2})_' +
                                                 '([0-9]{2})[0-9]{4}',
                                                 track_dict['valid_time'])
                                    if valid_match:
                                        valid_dd = valid_match.group(1)
                                        valid_hh = valid_match.group(2)
                                    else:
                                        # Shouldn't get here if this is
                                        # the first point of the track.
                                        valid_dd = ''
                                        valid_hh = ''
                                    track_dict['first_point'] = True
                                    track_dict['valid_dd'] = valid_dd
                                    track_dict['valid_hh'] = valid_hh

                                # Identify points based on valid time (hh).
                                # Useful for plotting later on.
                                valid_match = \
                                    re.match(r'[0-9]{8}_([0-9]{2})[0-9]{4}',
                                             track_dict['valid_time'])
                                if valid_match:
                                    # Since we are only interested in 00,
                                    # 06, 12, and 18 hr times...
                                    valid_hh = valid_match.group(1)

                                if valid_hh == '00' or valid_hh == '12':
                                    track_dict['lead_group'] = '0'
                                elif valid_hh == '06' or valid_hh == '18':
                                    track_dict['lead_group'] = '6'
                                else:
                                    # To gracefully handle any hours other
                                    # than 0, 6, 12, or 18
                                    track_dict['lead_group'] = ''

                                all_tracks_list.append(track_dict)

                                # For future work, support for MSLP when
                                # generating regional plots-
                                # implementation goes here...
                                # Finishing up, do any cleaning up,
                                # logging, etc.
                                self.logger.info("INFO: All criteria met, " +
                                                 "saving track data init " +
                                                 track_dict['init_time'] +
                                                 " lead " +
                                                 track_dict['fcst_lead_hh'] +
                                                 " lon " +
                                                 str(track_dict['lon']) +
                                                 " lat " +
                                                 str(track_dict['lat']))

                            else:
                                # Not the requested model, move to next
                                # row of data
                                continue

                        else:
                            # Not the requested init ymd move to next
                            # row of data
                            continue

                # Now separate the data based on storm id.
                for cur_unique in self.unique_storm_id:
                    cur_storm_list = []
                    for cur_line in all_tracks_list:
                        if cur_line['storm_id'] == cur_unique:
                            cur_storm_list.append(cur_line)
                        else:
                            # Continue to next line in all_tracks_list
                            continue

                    # Create the storm_id_dict, which is the data
                    # structure used to separate the storm data based on
                    # storm id.
                    self.storm_id_dict[cur_unique] = cur_storm_list

        else:
            self.logger.error("Input directory expected, check " +
                              "configuration file and try again.")
            sys.exit(1)
Пример #6
0
    def run_all_times(self):
        """! Builds the command for invoking tcmpr.R plot script.

             Args:

             Returns:

        """
        base_cmds_list = [' Rscript ', self.tcmpr_script, ' -lookin ']
        base_cmds = ''.join(base_cmds_list)
        self.logger.debug("base_cmds " + base_cmds)
        cmds_list = []

        self.logger.debug("DEBUG: TCMPR input " + self.input_data)
        self.logger.debug("DEBUG: TCMPR config file " + self.plot_config_file)
        self.logger.debug("DEBUG: output " + self.output_base_dir)

        # Create a list of all the "optional" options and flags.
        optionals_list = self.retrieve_optionals()

        # Create the output base directory
        util.mkdir_p(self.output_base_dir)

        # If input data is a file, create a single command and invoke R script.
        if os.path.isfile(self.input_data):
            self.logger.debug("Currently plotting " + self.input_data)
            cmds_list.append(base_cmds)
            cmds_list.append(self.input_data)

            # Special treatment of the "optional" output_base_dir option
            # because we are supporting the plotting of multiple tcst files
            # in a directory.
            if self.output_base_dir:
                # dated_output_dir = self.create_output_subdir(self.input_data)
                optionals_list.append(' -outdir ')
                # optionals_list.append(dated_output_dir)
                optionals_list.append(self.output_base_dir)
                optionals = ''.join(optionals_list)

            if optionals:
                cmds_list.append(optionals)
                # Due to the way cmds_list was created, join it all in to
                # one string and than split that in to a list, so element [0]
                # is 'Rscript', instead of 'Rscript self.tcmpr_script -lookin'
                cmds_list = ''.join(cmds_list).split()
                # cmd = batchexe('sh')['-c',''.join(cmds_list)] > '/dev/null'
                cmd = batchexe(cmds_list[0])[cmds_list[1:]] > '/dev/null'
                self.logger.debug("DEBUG: Command run " + cmd.to_shell())
                self.logger.info("INFO: Generating requested plots for " +
                                 self.input_data)

                # pylint:disable=unnecessary-pass
                # If a tc file is empty, continue to the next, thus the pass
                # isn't unnecessary.
                try:
                    checkrun(cmd)
                except produtil.run.ExitStatusException as ese:
                    self.logger.warn("WARN: plot_tcmpr.R returned non-zero"
                                     " exit status, "
                                     "tcst file may be missing data, "
                                     "continuing: " + ese)

                    # Remove the empty directory
                    if not os.listdir(self.output_base_dir):
                        os.rmdir(self.output_base_dir)
                    pass

        # If the input data is a directory, create a list of all the
        # files in the directory and invoke the R script for this list
        # of files.
        if os.path.isdir(self.input_data):
            self.logger.debug("plot all files in directory " + self.input_data)
            cmds_list = []
            all_tcst_files_list = util.get_files(self.input_data, ".*.tcst",
                                                 self.logger)
            all_tcst_files = ' '.join(all_tcst_files_list)
            self.logger.debug("num of files " + str(len(all_tcst_files)))
            # Append the mandatory -lookin option to the base command.
            cmds_list.append(base_cmds)
            cmds_list.append(all_tcst_files)
            # dated_output_dir = self.create_output_subdir(self.output_plot)
            dated_output_dir = self.output_base_dir
            if self.output_base_dir:
                cmds_list.append(' -outdir ')
                util.mkdir_p(self.output_base_dir)
                cmds_list.append(self.output_base_dir)
                self.logger.debug("DEBUG: Creating dated output dir " +
                                  dated_output_dir)

            if optionals_list:
                remaining_options = ''.join(optionals_list)
                cmds_list.append(remaining_options)

            # Due to the way cmds_list was created, join it all in to
            # one string and than split that in to a list, so element [0]
            # is 'Rscript', instead of 'Rscript self.tcmpr_script -lookin'
            cmds_list = ''.join(cmds_list).split()
            cmd = batchexe(cmds_list[0])[cmds_list[1:]] > '/dev/null'
            self.logger.debug("DEBUG:  Command run " + cmd.to_shell())

            # pylint:disable=unnecessary-pass
            # If a tc file is empty, continue to the next, thus the pass
            # isn't unnecessary.
            try:
                checkrun(cmd)
            except produtil.run.ExitStatusException as ese:
                # If the tcst file is empty (with the exception of the
                #  header), or there is some other problem, then
                # plot_tcmpr.R will return with a non-zero exit status of 1
                self.logger.warn("WARN: plot_tcmpr.R returned non-zero"
                                 " exit status, tcst file may be missing"
                                 " data... continuing: " + str(ese))
                # Remove the empty directory
                # Remove the empty directory
                if not os.listdir(dated_output_dir):
                    os.rmdir(dated_output_dir)

                pass
            # Reset empty cmds_list to prepare for next tcst file.
            cmds_list = []

        self.logger.info("INFO: Plotting complete")
Пример #7
0
    def run_all_times(self):
        """! Builds the command for invoking tcmpr.R plot script.

             Args:

             Returns:

        """

        self.logger.debug("TCMPR input " + self.input_data)
        self.logger.debug("TCMPR config file " + self.plot_config_file)
        self.logger.debug("output " + self.output_base_dir)

        # Create a dictionary of all the "optional" options and flags.
        cmds_dict = self.retrieve_optionals()

        # Create the TCMPR output base directory, where the final plots
        # will be saved.
        util.mkdir_p(self.output_base_dir)

        # If input data is a file, create a single command and invoke R script.
        if os.path.isfile(self.input_data):
            self.logger.debug("Currently plotting " + self.input_data)
            cmds_dict[' -lookin '] = self.input_data

            # Special treatment of the "optional" output_base_dir option
            # because we are supporting the plotting of multiple tcst files
            # in a directory.
            if self.output_base_dir:
                # dated_output_dir = self.create_output_subdir(self.input_data)
                cmds_dict[' -outdir '] = self.output_base_dir

            # Generate the list, where the -args are separated by their
            # values.
            full_cmd_list = ['Rscript' + self.tcmpr_script]
            for key, value in cmds_dict.items():
                full_cmd_list.append(key)
                full_cmd_list.append(value)

            # Separate the 'Rscript' portion from the args, to conform to
            # produtil's exe syntax.
            cmd = exe(full_cmd_list[0])[full_cmd_list[1:]] > '/dev/null'
            self.logger.debug("Command run " + cmd.to_shell())
            self.logger.info("Generating requested plots for " +
                             self.input_data)
            # pylint:disable=unnecessary-pass
            # If a tc file is empty, continue to the next, thus the pass
            # isn't unnecessary.
            try:
                checkrun(cmd)
            except produtil.run.ExitStatusException as ese:
                self.logger.warn("plot_tcmpr.R returned non-zero"
                                 " exit status, "
                                 "tcst file may be missing data, "
                                 "continuing: " + repr(ese))

        # If the input data is a directory, create a list of all the
        # files in the directory and invoke the R script for this list
        # of files.
        elif os.path.isdir(self.input_data):
            self.logger.debug("plot all files in directory " + self.input_data)
            cmds_dict = self.retrieve_optionals()
            all_tcst_files_list = util.get_files(self.input_data, ".*.tcst",
                                                 self.logger)
            all_tcst_files = ' '.join(all_tcst_files_list)
            self.logger.debug("num of files " + str(len(all_tcst_files)))
            # Append the mandatory -lookin option to the base command.
            cmds_dict['-lookin'] = all_tcst_files
            if self.output_base_dir:
                cmds_dict['-outdir'] = self.output_base_dir
                self.logger.debug("Creating dated output dir " +
                                  self.output_base_dir)

            # Create the full_cmd_list from the keys and values of the
            # cmds_dict and then form one command list.
            full_cmd_list = list()
            full_cmd_list.append("Rscript")
            full_cmd_list.append(self.tcmpr_script)
            for key, value in cmds_dict.items():
                full_cmd_list.append(key)
                if key == '-lookin':
                    # treat the list of dirs in -lookin differently,
                    # append each individual directory to replicate original
                    # implementation's behavior of splitting the commands
                    # by whitespace and assigning each command to an item
                    # in a list.
                    for tcst_file in all_tcst_files_list:
                        full_cmd_list.append(tcst_file)
                elif key == '-plot':
                    # plot types list is also appended as a single string,
                    # delimited by ','.
                    full_cmd_list.append(','.join(value))
                elif key == '-dep':
                    # dependant variables list items are appended
                    # as one string.  Convert list into a string delimited
                    # by ','.
                    full_cmd_list.append(','.join(value))

                else:
                    full_cmd_list.append(value)

            # Separate the 'Rscript' portion from the args, to conform to
            # produtil's exe syntax.
            cmd = exe(full_cmd_list[0])[full_cmd_list[1:]] > '/dev/null'

            # This can be a very long command if the user has
            # indicated a directory.  Only log this if necessary.
            # self.logger.debug("DEBUG:  Command run " + cmd.to_shell())
            # cmd_str = ' '.join(full_cmd_list)
            # cmd_list = 'Rscript ' + cmd_str
            # self.logger.debug('TCMPR Command run: ' + cmd_str)

            # Now run the command via produtil
            try:
                checkrun(cmd)
            except produtil.run.ExitStatusException as ese:
                # If the tcst file is empty (with the exception of the
                #  header), or there is some other problem, then
                # plot_tcmpr.R will return with a non-zero exit status of 1
                self.logger.error("plot_tcmpr.R returned non-zero"
                                  " exit status, tcst file may be missing"
                                  " data... continuing: " + str(ese))
                sys.exit(1)
        else:
            self.logger.error("Expected input is neither a file nor directory,"
                              "exiting...")
            sys.exit(1)

        self.logger.info("Plotting complete")
Пример #8
0
    def create_input_file_info(self, file_type):
        """! Consolidate all the relevant information on the input files
             such as full filepath, date (ymd or ymdh), and cycle and offset
             times (if applicable/available from filename), and the valid time.


             Args:
                 file_type   - either "fcst" (model) or "obs"
             Returns:
                consolidated_file_info - a list of named tuples containing
                                         information useful for determining
                                         the valid time of the file:
                                         full_filepath, date (ymd or ymdh),
                                         and offset/fhr if
                                         available/applicable.
        """
        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.

        # Used for logging.
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name
        self.logger.info("INFO|:" + cur_function + '|' + cur_filename + '| ' +
                         "Creating file information for model/fcst or obs...")

        # Get a list of all the model/fcst files
        dir_to_search = self.ps_dict['FCST_INPUT_DIR']
        fcst_file_regex = self.ps_dict['FCST_INPUT_FILE_REGEX']
        all_fcst_files = util.get_files(dir_to_search, fcst_file_regex,
                                        self.logger)

        # Get a list of all the obs files
        dir_to_search = self.ps_dict['OBS_INPUT_DIR']
        obs_file_regex = self.ps_dict['OBS_INPUT_FILE_REGEX']
        all_obs_files = util.get_files(dir_to_search, obs_file_regex,
                                       self.logger)

        # Initialize the output list
        consolidated_file_info = []

        # Determine which files are within the valid time window.
        # Whenever there is more than one fcst file with the same valid time,
        # keep it, because we want to perform verification for all fcst/model
        # forecast hours.
        time_method = self.ps_dict['TIME_METHOD']
        valid_start = self.ps_dict['START_DATE']
        valid_end = self.ps_dict['END_DATE']

        fhr_start = self.ps_dict['FCST_HR_START']
        fhr_end = self.ps_dict['FCST_HR_END']
        fhr_interval = self.ps_dict['FCST_HR_INTERVAL']

        fhr_start_secs = int(fhr_start) * self.HOURS_TO_SECONDS
        fhr_end_secs = int(fhr_end) * self.HOURS_TO_SECONDS
        last_fhr = fhr_end_secs + 1
        fhr_interval_secs = int(fhr_interval) * self.HOURS_TO_SECONDS
        date_start = self.convert_date_strings_to_unix_times(str(valid_start))
        date_end = self.convert_date_strings_to_unix_times(str(valid_end))
        all_valid_times = []
        all_dates = []
        all_fhrs = []
        for cur_fhr in range(fhr_start_secs, last_fhr, fhr_interval_secs):
            all_fhrs.append(cur_fhr)

        # create a list of tuples: date (yyyymmdd) and forecast hour (both
        # in seconds) to represent all the valid times of interest.
        if time_method == 'BY_VALID':
            for cur_date in range(date_start, date_end, fhr_interval_secs):
                for cur_fhr in range(fhr_start_secs, last_fhr,
                                     fhr_interval_secs):
                    cur_init_time = cur_date - cur_fhr
                    if cur_init_time not in all_dates:
                        all_dates.append(cur_init_time)
                all_valid_times.append(cur_date)

        if time_method == 'BY_INIT':  #original code from Minna
            for cur_date in range(date_start, date_end, fhr_interval_secs):
                for cur_fhr in range(fhr_start_secs, last_fhr,
                                     fhr_interval_secs):
                    cur_valid_time = cur_date + cur_fhr
                    if cur_valid_time not in all_valid_times:
                        all_valid_times.append(cur_valid_time)
                all_dates.append(cur_date)

        InputFileInfo = namedtuple(
            'InputFileInfo', 'full_filepath, date, '
            'valid_time, cycle_or_fcst')
        if file_type == "fcst":
            # Get the information for the fcst/model file
            if all_fcst_files:
                fcst_input_regex = self.ps_dict['FCST_INPUT_FILE_REGEX']
                regex_match = re.compile(fcst_input_regex)
                for fcst_file in all_fcst_files:
                    match = re.match(regex_match, fcst_file)
                    time_info_tuple = \
                        self.get_time_info_from_file(match)

                    # Determine if this file's valid time is one of the valid times of interest and corresponds to
                    # the expected forecast hour (based on forecast hour start and forecast hour interval).  If
                    # so, then consolidate the time info into the InputFileInfo tuple.
                    if time_info_tuple.date in all_dates and time_info_tuple.cycle_or_fcst in all_fhrs:
                        input_file_info = \
                            InputFileInfo(fcst_file, time_info_tuple.date,
                                          time_info_tuple.valid,
                                          time_info_tuple.cycle_or_fcst)
                        consolidated_file_info.append(input_file_info)
            else:
                self.logger.error('ERROR:|' + cur_function + '|' +
                                  cur_filename + 'No fcst files found in '
                                  'specified input directory. '
                                  ' Please verify that data '
                                  'files are present and the '
                                  'input directory path in '
                                  'the config file is correct.')
        else:
            # Get the relevant information for the obs file
            if all_obs_files:
                obs_input_regex = self.ps_dict['OBS_INPUT_FILE_REGEX']
                regex_match = re.compile(obs_input_regex)
                for obs_file in all_obs_files:
                    match = re.match(regex_match, obs_file)
                    time_info_tuple = self.get_time_info_from_file(match)

                    # Determine if this file's valid time is one of the valid times of interest.  If
                    # so, then consolidate the time info into the InputFileInfo tuple. Obs files may or may not have
                    # a cycle time (e.g. no cycle time: prepbufr.gdas.2017061500.nc  vs.
                    # cycle time:  prepbufr.nam.20170611.t00z.tm00.nc), so we need to check if the cycle_or_fcst tuple
                    # value is None:
                    if time_info_tuple.cycle_or_fcst is None:
                        if time_info_tuple.valid in all_valid_times:
                            input_file_info = \
                                InputFileInfo(obs_file, time_info_tuple.date,
                                          time_info_tuple.valid,
                                          time_info_tuple.cycle_or_fcst)
                            consolidated_file_info.append(input_file_info)
                    else:
                        if time_info_tuple.valid in all_valid_times and time_info_tuple.cycle_or_fcst in all_fhrs:
                            input_file_info = \
                                InputFileInfo(obs_file, time_info_tuple.date,
                                              time_info_tuple.valid,
                                              time_info_tuple.cycle_or_fcst)
                            consolidated_file_info.append(input_file_info)

            else:
                self.logger.error('ERROR:|' + cur_function + '|' +
                                  cur_filename + '| No obs files found in '
                                  'specified input directory. '
                                  ' Please verify that data '
                                  'files are present and the '
                                  'input directory path in '
                                  'the config file is correct.')
        return consolidated_file_info
Пример #9
0
    def run_all_times(self):
        """! Invoke the series analysis script based on
            the init time in the format YYYYMMDD_hh

            Args:

            Returns:
                None:  Creates graphical plots of storm tracks
        """
        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.

        # Used for logging.
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name
        self.logger.info("Starting series analysis by init time")

        # Set up the environment variable to be used in the Series Analysis
        #   Config file (SERIES_ANALYSIS_BY_INIT_CONFIG_FILE)
        # Used to set cnt  value in output_stats in
        # "SERIES_ANALYSIS_BY_INIT_CONFIG_FILE"
        # Need to do some pre-processing so that Python will use " and not '
        #  because currently MET doesn't support single-quotes
        tmp_stat_string = str(self.stat_list)
        tmp_stat_string = tmp_stat_string.replace("\'", "\"")

        # For example, we want tmp_stat_string to look like
        #   '["TOTAL","FBAR"]', NOT "['TOTAL','FBAR']"
        os.environ['STAT_LIST'] = tmp_stat_string
        self.add_env_var('STAT_LIST', tmp_stat_string)

        series_filter_opts = \
            self.config.getstr('config', 'SERIES_ANALYSIS_FILTER_OPTS')

        if self.regrid_with_met_tool:
            # Regridding via MET Tool regrid_data_plane.
            fcst_tile_regex = self.config.getstr('regex_pattern',
                                                 'FCST_NC_TILE_REGEX')
            anly_tile_regex = self.config.getstr('regex_pattern',
                                                 'ANLY_NC_TILE_REGEX')
        else:
            # Regridding via wgrib2 tool.
            fcst_tile_regex = self.config.getstr('regex_pattern',
                                                 'FCST_TILE_REGEX')
            anly_tile_regex = self.config.getstr('regex_pattern',
                                                 'ANLY_TILE_REGEX')
        # Initialize the tile_dir to point to the extract_tiles_dir.
        # And retrieve a list of init times based on the data available in
        # the extract tiles directory.
        tile_dir = self.extract_tiles_dir
        init_times = util.get_updated_init_times(tile_dir, self.logger)

        # Check for input tile data.
        try:
            util.check_for_tiles(tile_dir, fcst_tile_regex, anly_tile_regex,
                                 self.logger)
        except OSError:
            msg = ("Missing n x m tile files.  " +
                   "Extract tiles needs to be run")
            self.logger.error(msg)

        # If applicable, apply any filtering via tc_stat, as indicated in the
        # parameter/config file.
        tmp_dir = os.path.join(self.config.getdir('TMP_DIR'), str(os.getpid()))
        if series_filter_opts:
            self.apply_series_filters(tile_dir, init_times,
                                      self.series_filtered_out_dir,
                                      self.filter_opts, tmp_dir)

            # Clean up any empty files and directories that could arise as
            # a result of filtering
            util.prune_empty(self.series_filtered_out_dir, self.logger)

            # Get the list of all the files that were created as a result
            # of applying the filter options.
            # First, make sure that the series_lead_filtered_out
            # directory isn't empty.  If so, then no files fall within the
            # filter criteria.
            if os.listdir(self.series_filtered_out_dir):
                # The series filter directory has data, use this directory as
                # input for series analysis.
                tile_dir = self.series_filtered_out_dir

                # Generate the tmp_anly and tmp_fcst files used to validate
                # filtering and for troubleshooting
                # The tmp_fcst and tmp_anly ASCII files contain the
                # list of files that meet the filter criteria.
                filtered_dirs_list = util.get_files(tile_dir, ".*.",
                                                    self.logger)
                util.create_filter_tmp_files(filtered_dirs_list,
                                             self.series_filtered_out_dir,
                                             self.logger)

            else:
                # Applying the filter produced no results.  Rather than
                # stopping, continue by using the files from extract_
                # tiles as input.
                msg = ("Applied series filter options, no results..." +
                       "using extract tiles data for series analysis input.")
                self.logger.debug(msg)
                tile_dir = self.extract_tiles_dir

        else:
            # No additional filtering was requested.
            # Use the data in the extract tiles directory
            # as input for series analysis.
            # source of input tile data.
            tile_dir = self.extract_tiles_dir

        # Create FCST and ANLY ASCII files based on init time and storm id.
        # These are arguments to the
        # -fcst and -obs arguments to the MET Tool series_analysis.
        # First, get an updated list of init times,
        # since filtering can reduce the amount of init times.
        sorted_filter_init = self.get_ascii_storm_files_list(tile_dir)

        # Clean up any remaining empty files and dirs
        util.prune_empty(self.series_out_dir, self.logger)
        self.logger.debug("Finished creating FCST and ANLY ASCII files, and " +
                          "cleaning empty files and dirs")

        # Build up the arguments to and then run the MET tool series_analysis.
        self.build_and_run_series_request(sorted_filter_init, tile_dir)

        # Generate plots
        # Check for .nc files in output_dir first, if these are absent, the
        # there is a problem.
        if self.is_netcdf_created():
            self.generate_plots(sorted_filter_init, tile_dir)
        else:
            self.logger.error("No NetCDF files were created by"
                              " series_analysis, exiting...")
            sys.exit(errno.ENODATA)
        self.logger.info("Finished series analysis by init time")
Пример #10
0
    def get_ascii_storm_files_list(self, tile_dir):
        """! Creates the list of ASCII files that contain the storm id and init
             times.  The list is used to create an ASCII file which will be
             used as the option to the -obs or -fcst flag to the MET
             series_analysis tool.
             Args:
                   @param tile_dir:  The directory where input files reside.
             Returns:
                   sorted_filter_init:  A list of the sorted directories
                                        corresponding to the init times after
                                        filtering has been applied.  If
                                        filtering produced no results, this
                                        is the list of files created from
                                        running extract_tiles.
        """

        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.
        # For logging
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name

        filter_init_times = util.get_updated_init_times(tile_dir, self.logger)
        sorted_filter_init = sorted(filter_init_times)

        for cur_init in sorted_filter_init:
            # Get all the storm ids for storm track pairs that
            # correspond to this init time.
            storm_list = self.get_storms_for_init(cur_init, tile_dir)
            if not storm_list:
                # No storms for this init time,
                # check next init time in list
                continue
            else:
                for cur_storm in storm_list:
                    # First get the filenames for the gridded forecast and
                    # analysis (n deg x m deg tiles that were created by
                    # extract_tiles). These files are aggregated by
                    # init time and storm id.
                    anly_grid_regex = ".*ANLY_TILE_F.*grb2"
                    fcst_grid_regex = ".*FCST_TILE_F.*grb2"

                    if self.regrid_with_met_tool:
                        anly_grid_regex = ".*ANLY_TILE_F.*nc"
                        fcst_grid_regex = ".*FCST_TILE_F.*nc"

                    anly_grid_files = util.get_files(tile_dir, anly_grid_regex,
                                                     self.logger)
                    fcst_grid_files = util.get_files(tile_dir, fcst_grid_regex,
                                                     self.logger)

                    # Now do some checking to make sure we aren't
                    # missing either the forecast or
                    # analysis files, if so log the error and proceed to next
                    # storm in the list.
                    if not anly_grid_files or not fcst_grid_files:
                        # No gridded analysis or forecast
                        # files found, continue
                        self.logger.info("no gridded analysis or forecast " +
                                         "file found, continue to next storm")
                        continue

                    # Now create the FCST and ANLY ASCII files based on
                    # cur_init and cur_storm:
                    self.create_fcst_anly_to_ascii_file(
                        fcst_grid_files, cur_init, cur_storm,
                        self.fcst_ascii_file_prefix)
                    self.create_fcst_anly_to_ascii_file(
                        anly_grid_files, cur_init, cur_storm,
                        self.anly_ascii_file_prefix)
                    util.prune_empty(self.series_out_dir, self.logger)
        return sorted_filter_init
Пример #11
0
    def get_fcst_file_info(self, dir_to_search, cur_init, cur_storm):
        """! Get the number of all the gridded forecast n x m tile
            files for a given storm id and init time
            (that were created by extract_tiles). Determine the filename of the
            first and last files.  This information is used to create
            the title value to the -title opt in plot_data_plane.

            Args:
            @param dir_to_search: The directory of the gridded files of
                                  interest.
            @param cur_init:  The init time of interest.
            @param cur_storm:  The storm id of interest.

            Returns:
            num, beg, end:  A tuple representing the number of
                            forecast tile files, and the first and
                            last file.

                            sys.exit(1) otherwise
        """

        # pylint:disable=protected-access
        # Need to call sys.__getframe() to get the filename and method/func
        # for logging information.
        # For logging
        cur_filename = sys._getframe().f_code.co_filename
        cur_function = sys._getframe().f_code.co_name

        # Get a sorted list of the forecast tile files for the init
        # time of interest for all the storm ids and return the
        # forecast hour corresponding to the first and last file.
        # base_dir_to_search = os.path.join(output_dir, cur_init)
        gridded_dir = os.path.join(dir_to_search, cur_init, cur_storm)
        search_regex = ".*FCST_TILE.*.grb2"

        if self.regrid_with_met_tool:
            search_regex = ".*FCST_TILE.*.nc"

        files_of_interest = util.get_files(gridded_dir, search_regex,
                                           self.logger)
        sorted_files = sorted(files_of_interest)
        if not files_of_interest:
            msg = ("exiting, no files found for " + "init time of interest" +
                   " and directory:" + dir_to_search)
            self.logger.error(msg)
            sys.exit(1)

        first = sorted_files[0]
        last = sorted_files[-1]

        # Extract the forecast hour from the first and last
        # filenames.
        match_beg = re.search(".*FCST_TILE_(F[0-9]{3}).*.grb2", first)
        match_end = re.search(".*FCST_TILE_(F[0-9]{3}).*.grb2", last)
        if self.regrid_with_met_tool:
            match_beg = re.search(".*FCST_TILE_(F[0-9]{3}).*.nc", first)
            match_end = re.search(".*FCST_TILE_(F[0-9]{3}).*.nc", last)
        if match_beg:
            beg = match_beg.group(1)
        else:
            msg = ("Unexpected file format encountered, exiting...")
            self.logger.error(msg)
            sys.exit(1)
        if match_end:
            end = match_end.group(1)
        else:
            msg = ("Unexpected file format encountered, exiting...")
            self.logger.error(msg)
            sys.exit(1)

        # Get the number of forecast tile files
        num = len(sorted_files)

        return num, beg, end