Python preprocess_file示例，met_util.preprocess_file Python示例

示例#1

0

显示文件

文件： pcp_combine_wrapper.py 项目： jimfrimel/METplus

    def getLowestForecastFile(self, valid_time, dtype, template):
        """!Find the lowest forecast hour that corresponds to the
        valid time
        Args:
          @param valid_time valid time to search
          @param dtype data type (FCST or OBS) to get filename template
          @rtype string
          @return Path to file with the lowest forecast hour"""
        out_file = None

        # search for file with lowest forecast, then loop up into you find a valid one
        min_forecast = self.c_dict[dtype + '_MIN_FORECAST']
        max_forecast = self.c_dict[dtype + '_MAX_FORECAST']
        forecast_lead = min_forecast
        while forecast_lead <= max_forecast:
            input_dict = {}
            input_dict['valid'] = valid_time
            input_dict['lead_hours'] = forecast_lead
            time_info = time_util.ti_calculate(input_dict)
            fSts = sts.StringSub(self.logger, template, **time_info)
            search_file = os.path.join(self.input_dir, fSts.do_string_sub())
            search_file = util.preprocess_file(
                search_file, self.c_dict[dtype + '_INPUT_DATATYPE'],
                self.config)

            if search_file != None:
                return search_file
            forecast_lead += 1
        return None

示例#2

0

显示文件

def test_preprocess_file_unzipped():
    conf = ConfigWrapper(metplus_config(), None)
    stage_dir = conf.getdir('STAGING_DIR',
                            os.path.join(conf.getdir('OUTPUT_BASE'), "stage"))
    filepath = conf.getdir(
        'METPLUS_BASE') + "/internal_tests/data/zip/testfile4.txt"
    outpath = util.preprocess_file(filepath, None, conf)
    assert (filepath == outpath and os.path.exists(outpath))

示例#3

0

显示文件

文件： test_met_util.py 项目： xifengbishu/METplus

def test_preprocess_file_zip():
    conf = metplus_config()
    stage_dir = conf.getdir('STAGING_DIR',
                            os.path.join(conf.getdir('OUTPUT_BASE'), "stage"))
    filepath = conf.getdir(
        'METPLUS_BASE') + "/internal_tests/data/zip/testfile3.txt.zip"
    stagepath = stage_dir + conf.getdir(
        'METPLUS_BASE') + "/internal_tests/data/zip/testfile3.txt"
    outpath = util.preprocess_file(filepath, None, conf)
    assert (stagepath == outpath and os.path.exists(outpath))

示例#4

0

显示文件

文件： pcp_combine_wrapper.py 项目： jimfrimel/METplus

    def find_input_file(self, in_template, search_time, search_accum,
                        data_src):
        fSts = sts.StringSub(self.logger,
                             in_template,
                             valid=search_time,
                             level=(int(search_accum) * 3600))
        search_file = os.path.join(self.input_dir, fSts.do_string_sub())

        return util.preprocess_file(search_file,
                                    self.c_dict[data_src + '_INPUT_DATATYPE'],
                                    self.config)

示例#5

0

显示文件

文件： pcp_combine_wrapper.py 项目： jimfrimel/METplus

    def get_daily_file(self, time_info, accum, data_src, file_template):
        """!Pull accumulation out of file that contains a full day of data
        Args:
          @param valid_time valid time to search
          @param accum accumulation to extract from file
          @param data_src type of data (FCST or OBS)
          @param file_template filename template to search
          @rtype bool
          @return True if file was added to output list, False if not"""

        data_interval = self.c_dict[data_src + '_DATA_INTERVAL']
        times_per_file = self.c_dict[data_src + '_TIMES_PER_FILE']
        search_file = None
        # loop from valid_time back to data interval * times per file
        for i in range(0, times_per_file + 1):
            search_time = time_info['valid'] - datetime.timedelta(
                hours=(i * data_interval))
            # check if file exists
            dSts = sts.StringSub(self.logger, file_template, valid=search_time)
            search_file = os.path.join(self.input_dir, dSts.do_string_sub())
            search_file = util.preprocess_file(search_file,
                                            self.c_dict[data_src+\
                                              '_INPUT_DATATYPE'],
                                               self.config)
            if search_file is not None:
                break

        if search_file == None:
            return False

        diff = time_info['valid'] - search_time

        # Specifying integer division // Python 3,
        # assuming that was the intent in Python 2.
        lead = int((diff.days * 24) // (data_interval))
        lead += int((diff).seconds // (data_interval * 3600)) - 1
        # calling config.conf version of getter so default value is not
        # set in log and final conf because it is unnecessary
        fname = self.config.conf.getstr(
            'config', data_src + '_PCP_COMBINE_' + str(accum) + '_FIELD_NAME',
            '')
        if fname == '':
            self.logger.error(
                'NetCDF field name was not set in config: {}'.format(
                    data_src + '_PCP_COMBINE_' + str(accum) + '_FIELD_NAME'))
            return False

        addon = "'name=\"" + fname + "\"; level=\"(" + \
                str(lead) + ",*,*)\";'"
        self.add_input_file(search_file, addon)
        return True

示例#6

0

显示文件

文件： pcp_combine_wrapper.py 项目： jimfrimel/METplus

    def setup_subtract_method(self, time_info, var_info, rl):
        """!Setup pcp_combine to subtract two files to build desired accumulation
        Args:
          @param time_info object containing timing information
          @param var_info object containing variable information
          @params rl data type (FCST or OBS)
          @rtype string
          @return path to output file"""
        self.clear()
        in_dir, in_template = self.get_dir_and_template(rl, 'INPUT')
        out_dir, out_template = self.get_dir_and_template(rl, 'OUTPUT')

        if rl == 'FCST':
            accum = var_info['fcst_level']
        else:
            accum = var_info['obs_level']

        if accum[0].isalpha():
            accum = accum[1:]
        lead = time_info['lead_hours']
        lead2 = lead - int(accum)

        self.set_method("SUBTRACT")
        pcpSts1 = sts.StringSub(self.logger,
                                in_template,
                                level=(int(accum) * 3600),
                                **time_info)
        file1 = os.path.join(in_dir, pcpSts1.do_string_sub())
        file1 = util.preprocess_file(file1,
                                     self.c_dict[rl + '_INPUT_DATATYPE'],
                                     self.config)

        if file1 is None:
            self.logger.error(
                "Could not find file in {} for init time {} and lead {}".
                format(in_dir, time_info['init_fmt'], lead))
            return None

        # set time info for second lead
        input_dict2 = {'init': time_info['init'], 'lead_hours': lead2}
        time_info2 = time_util.ti_calculate(input_dict2)
        pcpSts2 = sts.StringSub(self.logger,
                                in_template,
                                level=(int(accum) * 3600),
                                **time_info2)
        file2 = os.path.join(in_dir, pcpSts2.do_string_sub())
        file2 = util.preprocess_file(file2,
                                     self.c_dict[rl + '_INPUT_DATATYPE'],
                                     self.config)

        if file2 is None:
            self.logger.error(
                "Could not find file in {} for init time {} and lead {}".
                format(in_dir, time_info2['init_fmt'], lead2))
            return None

        self.add_input_file(file1, lead)
        self.add_input_file(file2, lead2)

        outSts = sts.StringSub(self.logger,
                               out_template,
                               level=(int(accum) * 3600),
                               **time_info)
        out_file = outSts.do_string_sub()
        self.outfile = out_file
        self.outdir = out_dir

        return self.get_command()

示例#7

0

显示文件

    def find_data(self, time_info, var_info, data_type):
        """! Finds the data file to compare
              Args:
                @param time_info dictionary containing timing information
                @param var_info object containing variable information
                @param data_type type of data to find (FCST or OBS)
                @rtype string
                @return Returns the path to an observation file
        """
        # get time info
        valid_time = time_info['valid_fmt']

        if var_info is not None:
            # set level based on input data type
            if data_type.startswith("OBS"):
                v_level = var_info['obs_level']
            else:
                v_level = var_info['fcst_level']

            # separate character from beginning of numeric level value if applicable
            level = util.split_level(v_level)[1]

            # set level to 0 character if it is not a number
            if not level.isdigit():
                level = '0'
        else:
            level = '0'

        template = self.c_dict[data_type + '_INPUT_TEMPLATE']
        data_dir = self.c_dict[data_type + '_INPUT_DIR']

        # if looking for a file with an exact time match:
        if self.c_dict[data_type + '_FILE_WINDOW_BEGIN'] == 0 and \
                        self.c_dict[data_type + '_FILE_WINDOW_END'] == 0:
            # perform string substitution
            dsts = sts.StringSub(self.logger,
                                 template,
                                 level=(int(level.split('-')[0]) * 3600),
                                 **time_info)
            filename = dsts.do_string_sub()

            # build full path with data directory and filename
            path = os.path.join(data_dir, filename)

            # check if desired data file exists and if it needs to be preprocessed
            path = util.preprocess_file(
                path, self.c_dict[data_type + '_INPUT_DATATYPE'], self.config)
            return path

        # if looking for a file within a time window:
        # convert valid_time to unix time
        valid_seconds = int(
            datetime.strptime(valid_time, "%Y%m%d%H%M").strftime("%s"))
        # get time of each file, compare to valid time, save best within range
        closest_files = []
        closest_time = 9999999

        # get range of times that will be considered
        valid_range_lower = self.c_dict[data_type + '_FILE_WINDOW_BEGIN']
        valid_range_upper = self.c_dict[data_type + '_FILE_WINDOW_END']
        lower_limit = int(
            datetime.strptime(
                util.shift_time_seconds(valid_time, valid_range_lower),
                "%Y%m%d%H%M").strftime("%s"))
        upper_limit = int(
            datetime.strptime(
                util.shift_time_seconds(valid_time, valid_range_upper),
                "%Y%m%d%H%M").strftime("%s"))

        # step through all files under input directory in sorted order
        # pylint:disable=unused-variable
        # os.walk returns a tuple. Not all returned values are needed.
        for dirpath, dirnames, all_files in os.walk(data_dir):
            for filename in sorted(all_files):
                fullpath = os.path.join(dirpath, filename)

                # remove input data directory to get relative path
                rel_path = fullpath.replace(data_dir + "/", "")
                # extract time information from relative path using template
                file_time_info = util.get_time_from_file(
                    self.logger, rel_path, template)
                if file_time_info is not None:
                    # get valid time and check if it is within the time range
                    file_valid_time = file_time_info['valid'].strftime(
                        "%Y%m%d%H%M")
                    # skip if could not extract valid time
                    if file_valid_time == '':
                        continue
                    file_valid_dt = datetime.strptime(file_valid_time,
                                                      "%Y%m%d%H%M")
                    file_valid_seconds = int(file_valid_dt.strftime("%s"))
                    # skip if outside time range
                    if file_valid_seconds < lower_limit or file_valid_seconds > upper_limit:
                        continue

                    # if only 1 file is allowed, check if file is
                    # closer to desired valid time than previous match
                    if not self.c_dict['ALLOW_MULTIPLE_FILES']:
                        diff = abs(valid_seconds - file_valid_seconds)
                        if diff < closest_time:
                            closest_time = diff
                            del closest_files[:]
                            closest_files.append(fullpath)
                    # if multiple files are allowed, get all files within range
                    else:
                        closest_files.append(fullpath)

        if not closest_files:
            return None

        # check if file(s) needs to be preprocessed before returning the path
        # return single file path if 1 file was found
        if len(closest_files) == 1:
            return util.preprocess_file(
                closest_files[0], self.c_dict[data_type + '_INPUT_DATATYPE'],
                self.config)

        # return list if multiple files are found
        out = []
        for close_file in closest_files:
            outfile = util.preprocess_file(
                close_file, self.c_dict[data_type + '_INPUT_DATATYPE'],
                self.config)
            out.append(outfile)

        return out

示例#8

0

显示文件

def test_preprocess_file_none():
    conf = ConfigWrapper(metplus_config(), None)
    outpath = util.preprocess_file(None, None, conf)
    assert (outpath is None)

示例#9

0

显示文件

文件： ensemble_stat_wrapper.py 项目： jimfrimel/METplus

    def find_model_members(self, time_info):
        """! Finds the model member files to compare
              Args:
                @param time_info dictionary containing timing information
                @rtype string
                @return Returns a list of the paths to the ensemble model files
        """
        model_dir = self.c_dict['FCST_INPUT_DIR']
        # used for filling in missing files to ensure ens_thresh check is accurate
        fake_dir = '/ensemble/member/is/missing'

        # model_template is a list of 1 or more.
        ens_members_path = []
        # get all files that exist
        for ens_member_template in self.c_dict['FCST_INPUT_TEMPLATE']:
            model_ss = sts.StringSub(self.logger, ens_member_template,
                                     **time_info)
            member_file = model_ss.do_string_sub()
            expected_path = os.path.join(model_dir, member_file)

            # if wildcard expression, get all files that match
            if '?' in expected_path:
                wildcard_files = sorted(glob.glob(expected_path))
                self.logger.debug(
                    'Ensemble members file pattern: {}'.format(expected_path))
                self.logger.debug('{} members match file pattern'.format(
                    str(len(wildcard_files))))

                # add files to list of ensemble members
                for wildcard_file in wildcard_files:
                    ens_members_path.append(wildcard_file)
            else:
                # otherwise check if file exists
                expected_path = util.preprocess_file(
                    expected_path, self.c_dict['FCST_INPUT_DATATYPE'],
                    self.config)

                # if the file exists, add it to the list
                if expected_path != None:
                    ens_members_path.append(expected_path)
                else:
                    # add relative path to fake dir and add to list
                    ens_members_path.append(os.path.join(
                        fake_dir, member_file))
                    self.logger.warning(
                        'Expected ensemble file {} not found'.format(
                            member_file))

        # if more files found than expected, error and exit
        if len(ens_members_path) > self.c_dict['N_MEMBERS']:
            msg = 'Found more files than expected! ' +\
                  'Found {} expected {}. '.format(len(ens_members_path),
                                                  self.c_dict['N_MEMBERS']) +\
                  'Adjust wildcard expression in [filename_templates] '+\
                  'FCST_ENSEMBLE_STAT_INPUT_TEMPLATE or adjust [config] '+\
                  'ENSEMBLE_STAT_N_MEMBERS. Files found: {}'.format(ens_members_path)
            self.logger.error(msg)
            self.logger.error(
                "Could not file files in {} for init {} f{} ".format(
                    model_dir, time_info['init_fmt'],
                    str(time_info['lead_hours'])))
            return False
        # if fewer files found than expected, warn and add fake files
        elif len(ens_members_path) < self.c_dict['N_MEMBERS']:
            msg = 'Found fewer files than expected. '+\
              'Found {} expected {}.'.format(len(ens_members_path),
                                             self.c_dict['N_MEMBERS'])
            self.logger.warning(msg)
            # add fake files to list to get correct number of files for ens_thresh
            diff = self.c_dict['N_MEMBERS'] - len(ens_members_path)
            self.logger.warning('Adding {} fake files to '.format(str(diff))+\
                                'ensure ens_thresh check is accurate')
            for _ in range(0, diff, 1):
                ens_members_path.append(fake_dir)

        # write file that contains list of ensemble files
        list_filename = time_info['init_fmt'] + '_' + \
          str(time_info['lead_hours']) + '_ensemble.txt'
        return self.write_list_file(list_filename, ens_members_path)

示例#10

0

显示文件

文件： regrid_data_plane_wrapper.py 项目： jimfrimel/METplus

    def run_at_time_once(self, time_info, var_info, dtype):
        """! Runs the MET application for a given time and forecast lead combination
              Args:
                @param ti time_info object containing timing information
                @param v var_info object containing variable information
        """
        self.clear()

        if dtype == "FCST":
            compare_var = var_info['fcst_name']
            level = var_info['fcst_level']
        else:
            compare_var = var_info['obs_name']
            level = var_info['obs_level']

        level = util.split_level(level)[1]

        if self.c_dict[dtype + '_INPUT_DIR'] == '':
            self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_DIR'.format(dtype) +\
                              ' in config file')
            exit(1)

        if self.c_dict[dtype + '_INPUT_TEMPLATE'] == '':
            self.logger.error('Must set {}_REGRID_DATA_PLANE_INPUT_TEMPLATE'.format(dtype) +\
                              ' in config file')
            exit(1)

        if self.c_dict[dtype + '_OUTPUT_DIR'] == '':
            self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_DIR'.format(dtype) +\
                              ' in config file')
            exit(1)

        if self.c_dict[dtype + '_OUTPUT_TEMPLATE'] == '':
            self.logger.error('Must set {}_REGRID_DATA_PLANE_OUTPUT_TEMPLATE'.format(dtype) +\
                              ' in config file')
            exit(1)

        input_dir = self.c_dict[dtype + '_INPUT_DIR']
        input_template = self.c_dict[dtype + '_INPUT_TEMPLATE']
        output_dir = self.c_dict[dtype + '_OUTPUT_DIR']
        output_template = self.c_dict[dtype + '_OUTPUT_TEMPLATE']

        if not level.isdigit():
            f_level = '0'
        else:
            f_level = level

        string_sub = sts.StringSub(self.logger,
                                   input_template,
                                   level=(int(f_level) * 3600),
                                   **time_info)
        infile = os.path.join(input_dir, string_sub.do_string_sub())

        infile = util.preprocess_file(
            infile,
            self.config.getstr('config',
                               dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE',
                               ''), self.config)
        if infile is not None:
            self.infiles.append(infile)
        else:
            self.logger.error(
                'Could not find input file in {} matching template {}'.format(
                    input_dir, input_template))
            return False
        verif_grid = self.c_dict['VERIFICATION_GRID']
        if verif_grid == '':
            self.logger.error('No verification grid specified! ' + \
                              'Set REGRID_DATA_PLANE_VERIF_GRID')
            return False

        self.infiles.append(verif_grid)
        string_sub = sts.StringSub(self.logger,
                                   output_template,
                                   level=(int(f_level) * 3600),
                                   **time_info)
        outfile = string_sub.do_string_sub()
        self.set_output_path(os.path.join(output_dir, outfile))

        outpath = self.get_output_path()
        if os.path.exists(outpath) and \
          self.c_dict['SKIP_IF_OUTPUT_EXISTS'] is True:
            self.logger.debug(
                'Skip writing output file {} because it already '
                'exists. Remove file or change '
                'REGRID_DATA_PLANE_SKIP_IF_OUTPUT_EXISTS to True to process'.
                format(outpath))
            return True

        if self.config.getstr('config',
                              dtype + '_REGRID_DATA_PLANE_INPUT_DATATYPE',
                              '') in ['', 'NETCDF']:
            field_name = "{:s}_{:s}".format(compare_var, str(level).zfill(2))
            self.args.append(
                "-field 'name=\"{:s}\"; level=\"(*,*)\";'".format(field_name))
        else:
            field_name = "{:s}".format(compare_var)
            self.args.append("-field 'name=\"{:s}\"; level=\"{:s}\";'".format(
                field_name, level))

        if self.c_dict['METHOD'] != '':
            self.args.append("-method {}".format(self.c_dict['METHOD']))

        self.args.append("-width {}".format(self.c_dict['WIDTH']))

        self.args.append("-name " + field_name)
        cmd = self.get_command()
        if cmd is None:
            self.logger.error("Could not generate command")
            return
        self.build()