Пример #1
0
 def load_obs(self, forecast_hour=6, madis=True, variance=False):
     """
     Loads the observations corresponding with n hours after ensemble was
     initialized (default 6 hours). Returns the observations from the text file.
     -Loads MADIS observations if madis=True, loads gridded observations if False.
     -If variance == True, loads gridded observations which also contain 
     ensemble variance at the ob location.
     -Gridded and MADIS observations must be generated/obtained before calling this function.
     Returns a list of observations. 
     """
     
     dty, dtm, dtd, dth = ef.dt_str_timedelta(self.date,forecast_hour)
     
     ob_str = ef.var_string([self.ob_type])        
     # directory where the observations are
     if madis==True:
         obs_file = '/home/disk/hot/stangen/Documents/surface_obs/MADIS/'+dty+dtm+'/combined_'+self.ob_type+'/'+self.ob_type+'_'+dty+dtm+dtd+'_'+dth+'00.txt'
     elif madis==False:
         if self.new_format == False:
             obs_file = '/home/disk/hot/stangen/Documents/gridded_obs/'+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/'+self.ob_type+'_'+dty+dtm+dtd+'_'+dth+'00'
         elif self.new_format == True:
             obs_file = '/home/disk/hot/stangen/Documents/gridded_obs/'+self.ens_type+'/'+dty+dtm+'/'+self.ob_type+'/'+dty+dtm+dtd+'_'+dth+'00_'+str(self.l)+'_'+str(self.r)+'_'+str(self.t)+'_'+str(self.b)+'_'+str(self.s)
         if variance==True:
             obs_file += '_variance'
         obs_file += '.txt'
     print('loading '+ob_str+' obs from '+dty+dtm+dtd+'_'+dth+'00')
     f1 = open(obs_file, 'r')
     obs = f1.readlines()
     
     return obs
Пример #2
0
def retreive_tigge_data(date):
    y = date.strftime('%Y')
    m = date.strftime('%m')
    d = date.strftime('%d')
    h = date.strftime('%H')
    save_dir = '/home/disk/hot/stangen/Documents/tigge_ensembles/' + ens + '/' + y + m + '/'

    #Create directories if they don't yet exit
    if (os.path.isdir(save_dir)):
        pass
    else:
        os.makedirs(save_dir)

    date = y + '-' + m + '-' + d

    if surface == True:
        target = save_dir + date + '_' + h + '_' + ens + '_' + var_string + '_sfc.nc'
        tigge_pf_sfc_request(date, h, target)
    elif surface == False:
        levstr = ef.var_string(levels)
        target = save_dir + date + '_' + h + '_' + ens + '_' + var_string + '_' + levstr + '_pl.nc'
        tigge_pf_pl_request(date, h, target)
Пример #3
0
 def __init__(self,date,ens_type,prior_vrbls,ob_type,update_var=[],post_vrbls=[],
              grid=[], new_format=False, efh=54):
     self.date = date  
     self.y = self.date.strftime('%Y')
     self.m = self.date.strftime('%m')
     self.d = self.date.strftime('%d')
     self.h = self.date.strftime('%H')
     self.ens_type = ens_type
     self.var_string = ef.var_string(prior_vrbls) #convert list of vrbls to string
     self.post_vrbls = post_vrbls
     #self.vrbls = vrbls
     self.ob_type = ob_type
     self.update_var = update_var
     #this allows an empty grid to be input
     try:
         self.l = grid[0]
         self.r = grid[1]
         self.t = grid[2]
         self.b = grid[3]
         self.s = grid[4]
     except:
         pass
     self.new_format = new_format
     self.efh = str(efh)+'hrs'
Пример #4
0
def run_efa(ob_type, update_var):

    #assign the radius to the localization_radius variable in Observation
    #create a string for saving to specific directories
    if loc_type == 'GC':
        loc_rad = localize_radius
        loc_str = str(loc_rad)
    else:
        loc_rad = None
        loc_str = '_stat_sig'

    time_lag = False

    #a list of dates to loop through to load each forecast initialized on these dates

    y = date.strftime('%Y')
    m = date.strftime('%m')
    d = date.strftime('%d')
    h = date.strftime('%H')

    observations = []
    for o, o_type in enumerate(ob_type):
        efa = Load_Data(date, ensemble_type, variables, o_type, update_var)
        #only need to load the netCDF once (first time through the obtype loop)
        if o == 0:
            #initialize an instance of the Load_data class (load in data)
            statecls, lats, lons, elevs = efa.load_netcdfs()

        #load in the obs file
        obs = efa.load_obs()

        #loop through each line in the text file (loop through each observation)
        for ob in obs:
            #this gets the observation information from the text file
            ob_dict = mt.get_ob_info(ob)
            #get longitude positive-definite- ie -130 lon is 230 E
            if ob_dict['lon'] < 0:
                ob_dict['lon'] = ob_dict['lon'] + 360
            utctime = datetime.utcfromtimestamp(ob_dict['time'])
            #check elevation of 4 nearest gridpoints to see whether or not to assimilate ob
            TorF = ef.closest_points(ob_dict['lat'], ob_dict['lon'], lats,
                                     lons, ob_dict['elev'], elevs)
            #fill the observation class object with information for assimilation
            obser = Observation(value=ob_dict['ob'],
                                time=utctime,
                                lat=ob_dict['lat'],
                                lon=ob_dict['lon'],
                                obtype=o_type,
                                localize_radius=loc_rad,
                                assimilate_this=TorF,
                                error=1)
            observations.append(obser)
    print('loaded ' + str(len(observations)) + ' obs for assimilation')

    #    ob1 = Observation(value=10000.25, time=datetime(2013,4,1,6),lat=24.55,lon=278.21,
    #                  obtype = ob_type[0], localize_radius=1000, assimilate_this=True,
    #                  error=1)
    #
    #
    #    observations.append(ob1)
    #
    # Put the state class object and observation objects into EnSRF object
    assimilator = EnSRF(statecls, observations, loc=loc_type)

    # Update the prior with EFA- post_state is an EnsembleState object
    post_state, post_obs = assimilator.update()

    state = post_state

    #build the string of where to save the files
    outdir = '/home/disk/hot/stangen/Documents/posterior_ensembles/'
    #are the observations only updating their corresponding variable, or
    #are they updating all variables?
    if self_update == True:
        outdir += 'ob_update_self/'
    elif self_update == False:
        outdir += 'ob_update_all/'
    #directory for the type of localization used
    outdir += 'loc_' + loc_str + '/'
    #if dealing with time-lagged ensembles, add another directory
    if time_lag == True:
        outdir += 'time_lag/'
    #add directories for the ens type and year & month
    outdir += ensemble_type + '/' + y + m + '/'

    #Create output directories if they don't yet exist
    if (os.path.isdir(outdir)):
        pass
    else:
        os.makedirs(outdir)

    outdir_date_ens = outdir + y + 'T2M-' + m + '-' + d + '_' + h + '_' + ensemble_type

    #if we are only updating the variable type that matches the observation type
    if self_update == True:
        for var in state.vars():
            checkfile = outdir_date_ens + '*'
            # If we're writing a single variable and the other variable already exists,
            # append to that file
            existing_file = glob.glob(checkfile)
            #if the other variable already exists
            if existing_file != []:
                #when only working with 2 variables, this should hold true
                last_ob = True
                print('Appending to existing file!')
                #should only be one file in there, convert from list to string
                existing_file = existing_file[0]
                # append to the existing file
                with Dataset(existing_file, 'a') as dset:
                    print('Writing variable {}'.format(var))
                    dset.createVariable(var, np.float32, (
                        'time',
                        'lat',
                        'lon',
                        'ens',
                    ))
                    dset.variables[var].units = ut.get_units(var)
                    dset.variables[var][:] = state[var].values
                #if the filename already contains a .nc because we created the
                #file we are appending to separately from this run, delete
                #the .nc
                existing_file = existing_file.replace('.nc', '')
                # Rename the checkfile so the filename no longer specifies a
                # single variable type
                newfile = existing_file + '_' + ef.var_string(ob_type)
                if last_ob == True:
                    newfile += '.nc'
                    print('Done!')

                os.system('mv {} {}'.format(existing_file, newfile))
                # ALL DONE!!
            else:

                # If the checkfile does not exist, make a new file
                outfile = outdir_date_ens + '_' + ef.var_string(ob_type)
                #if we are assimilating only one type of observation
                if len(obs_type) == 1:
                    outfile = outfile + '.nc'
                ef.make_netcdf(state, outfile)

    #if we are updating all variable types with the observation (regardless of its type)
    elif self_update == False:
        outfile = outdir_date_ens + '_' + ef.var_string(ob_type) + '.nc'
        ef.make_netcdf(state, outfile)
Пример #5
0
figsize1 = 18
figsize2 = 12

#-----------------------------------------------------------------------------

#end forecast hour string
efh = '54hrs'

#convert to grid indices, add 1 to the right endpoints so python includes the last index
l = int(abs(-180 - w) * 2)
r = int(abs(-180 - e) * 2) + 1
t = int((90 - n) * 2)
b = int((90 - s) * 2) + 1

#convert lists of variables into strings for loading files
prior_var_str = ef.var_string(prior_var)

#get the times for the forecast
fy = forecast_time.strftime('%Y')
fm = forecast_time.strftime('%m')
fd = forecast_time.strftime('%d')
fh = forecast_time.strftime('%H')

# Filepath of the prior forecast
prior_path = '/home/disk/hot/stangen/Documents/prior_ensembles/' + ens + '/' + fy + fm + '/' + fy + '-' + fm + '-' + fd + '_' + fh + '_' + efh + '_' + prior_var_str + '.nc'

# Load the prior data
print('loading prior netCDF: ' + fy + fm + fd + fh)
with Dataset(prior_path, 'r') as ncdata:
    #print(ncdata.variables)
    times = ncdata.variables['time']
Пример #6
0
savedir = '/home/disk/hot/stangen/Documents/EFA/AR/plots/'

#create strings for loading txt file containing the stats
sy = start_date.strftime('%Y')
sm = start_date.strftime('%m')
sd = start_date.strftime('%d')
sh = start_date.strftime('%H')

ey = end_date.strftime('%Y')
em = end_date.strftime('%m')
ed = end_date.strftime('%d')
eh = end_date.strftime('%H')

datestr=sy+sm+sd+sh+'-'+ey+em+ed+eh

varstr = ef.var_string(assim_obs)

gridstr = ef.var_string(grid)

if AR_specific == True:
    filepath = filedir+datestr+'_'+varstr+'_'+gridstr+'_gridobs_ARspecific.txt'
else:
    filepath = filedir+datestr+'_'+varstr+'_'+gridstr+'_gridobs.txt'

f1 = open(filepath, 'r')
stats = f1.readlines()

stats_dict = {}

#-----------Changed it so that var is now ensemble type, ens is now observation variable type------
Пример #7
0
def run_efa(ob_type, update_var, ob_err_var):
    """
    All of the inputs are lists.
    """

    #print values used
    print('localization radius: ' + str(localize_radius))
    print('observation error variance: ' + str(ob_err_var))

    #assign the radius to the localization_radius variable in Observation
    #create a string for saving to specific directories
    if loc_type == 'GC':
        localize_type = 'GC'
        loc_rad = localize_radius
        loc_str = str(loc_rad)
    elif loc_type == 'hybrid':
        localize_type = 'GC'
        #set the localization string to be the special loc radius, plus hybrid (2000hybrid)
        loc_str = str(localize_radius) + loc_type

    elif loc_type.startswith('statsig'):
        localize_type = loc_type
        loc_rad = localize_radius
        loc_str = str(localize_radius) + loc_type

    #points within a radius of ob will be fully updated by covariance (not localized)
    elif loc_type == 'cutoff':
        localize_type = loc_type
        loc_rad = localize_radius
        loc_str = str(localize_radius) + loc_type

    else:
        loc_rad = None
        loc_str = '_stat_sig'

    y = date.strftime('%Y')
    m = date.strftime('%m')
    d = date.strftime('%d')
    h = date.strftime('%H')

    observations = []

    if loc_type == 'hybrid':
        #need to access 12-hour forecast IVT to determine presence of AR
        efaIVT = Load_Data(date,
                           ensemble_type,
                           variables,
                           'IVT', ['IVT'],
                           grid=grid,
                           new_format=new_format,
                           efh=efh)

        #initialize an instance of the Load_data class (load in data)
        IVT_statecls, lats, lons, elevs = efaIVT.load_netcdfs()
        #get IVT 12 hours into the forecast, at time of observations
        #(time_ind = 2 is forecast hour 12)
        IVT = IVT_statecls.variables['IVT'].values[2, :, :, :]

        #obtain the mean of the ensemble (nlats x nlons)
        ens_mean = IVT.mean(axis=-1)

        #obtain variance of the ensemble (nlats x nlons)
        variance = np.var(IVT, axis=-1, ddof=1)

    #loop through each observation type
    for o, o_type in enumerate(ob_type):

        #if we are wanting to use ensemble variance as ob error variance
        if ob_err_var[o].startswith('ensvar'):
            use_ens_var = True
        else:
            use_ens_var = False

        efa = Load_Data(date,
                        ensemble_type,
                        variables,
                        o_type,
                        update_var,
                        grid=grid,
                        new_format=new_format,
                        efh=efh)
        #only need to load the netCDF once (first time through the obtype loop)
        if o == 0:
            #initialize an instance of the Load_data class (load in data)
            statecls, lats, lons, elevs = efa.load_netcdfs()

        #if we are assimilating MADIS observations
        if ob_category == 'madis':
            #load in the obs file
            obs = efa.load_obs()
        #if we are assimilating next-cycle 0-hour forecast gridded "observations"
        elif ob_category == 'gridded':
            #load in the obs file
            obs = efa.load_obs(forecast_hour=12,
                               madis=False,
                               variance=use_ens_var)

        #loop through each line in the text file (loop through each observation)
        for ob in obs:
            #this gets the observation information from the text file
            ob_dict = mt.get_ob_info(ob, use_ens_var)
            #get longitude positive-definite- ie -130 lon is 230 E
            if ob_dict['lon'] < 0:
                ob_dict['lon'] = ob_dict['lon'] + 360
            #get ob time in datetime format
            utctime = datetime.utcfromtimestamp(ob_dict['time'])
            if ob_category == 'madis':
                #check elevation of 4 nearest gridpoints to see whether or not to assimilate ob-
                #returns true or false
                TorF = ef.closest_points(ob_dict['lat'], ob_dict['lon'], lats,
                                         lons, ob_dict['elev'], elevs)
            elif ob_category == 'gridded':
                #no need to check elevation, since using gridded obs
                TorF = True

            #if we are using ens variance as ob error variance
            if use_ens_var == True:
                #multiply by factor specified at end of 'ensvar'
                mult_factor = ob_err_var[o].replace('ensvar', '')
                if mult_factor == '':
                    mult_factor = 1
                else:
                    mult_factor = float(mult_factor)
                print('multiplication factor: ', mult_factor)
                ob_var = ob_dict['variance'] * mult_factor
            elif use_ens_var == False:
                ob_var = float(ob_err_var[o])

            #if we are using a hybrid localization radius- longer localization
            #radius within the AR, 1000 km outside of it
            #if the lat/lon lie within some AR box, check if the ob point is in an AR-
            #if IVT > 250
            if loc_type == 'hybrid':

                if ob_dict['lon'] >= 140 and ob_dict['lon'] <= 245 and ob_dict[
                        'lat'] >= 33 and ob_dict['lat'] <= 51:

                    #get the ensemble value at the lat/lon pair (ob_variance isn't used)
                    ob_value, ob_variance = ef.closest_points(
                        ob_dict['lat'],
                        ob_dict['lon'],
                        lats,
                        lons,
                        variable=ens_mean,
                        need_interp=True,
                        gen_obs=True,
                        variance=variance)
                    #if ob value is AR and in the grid area, set its localization radius to the input (hybrid) loc_rad
                    if ob_value >= 250:
                        loc_rad = localize_radius
                        #print(str(ob_value))
                    else:
                        loc_rad = 1000
                else:
                    #set the default loc_rad to be 1000
                    loc_rad = 1000

            #check if it's working
#            print(str(ob_dict['ob']))
#            print(str(ob_dict['lat'])+' '+str(ob_dict['lon'])+' '+str(loc_rad))
#fill the observation class object with information for assimilation
            obser = Observation(value=ob_dict['ob'],
                                time=utctime,
                                lat=ob_dict['lat'],
                                lon=ob_dict['lon'],
                                obtype=o_type,
                                localize_radius=loc_rad,
                                assimilate_this=TorF,
                                error=ob_var)
            observations.append(obser)

    print('loaded ' + str(len(observations)) + ' obs for assimilation')

    # Put the state class object and observation objects into EnSRF object
    assimilator = EnSRF(statecls,
                        observations,
                        inflation=inflation,
                        loc=localize_type)

    # Update the prior with EFA- state is an EnsembleState object and is the posterior, post_obs isn't used
    state, post_obs = assimilator.update()

    #---build the string of which directory to save the file--------------------------------
    outdir = '/home/disk/hot/stangen/Documents/posterior_ensembles/'
    #what kind of observations are we assimilating?
    outdir += ob_category + '/'

    #are the observations only updating their corresponding variable, or
    #are they updating all variables?
    if self_update == True:
        outdir += 'ob_update_self/'
    elif self_update == False:
        outdir += 'ob_update_all/'
    #directory for inflation used
    outdir += 'inf_' + inflation_str + '/'
    #directory for the type of localization used
    outdir += 'loc_' + loc_str + '/'
    #add directories for the ens type and year & month
    outdir += ensemble_type + '/' + y + m + '/'
    #----------------------------------------------------------------------------------------

    #Create output directories if they don't yet exist
    if (os.path.isdir(outdir)):
        pass
    else:
        os.makedirs(outdir)

    #add initialization date to filename
    outdir_date_ens = outdir + y + '-' + m + '-' + d + '_' + h

    #new format string: add nhrs and if gridded, grid dimensions to filename
    if new_format == True:
        outdir_date_ens += '_' + str(efh) + 'hrs'
        if ob_category == 'gridded':
            outdir_date_ens += '_' + ef.var_string(grid)

    #convert 1-length ob err var list to a string with no decimals
    #this makes it so that multiple ob err vars that acted on one data type
    #can all be saved to one netCDF- makes managing files easier where I was testing
    #what observation error variance to use.
    #more specifically, this adds ob err var to the variable name in the netCDF.
    #ob_err_var_str is only called if self_update == True, so the ob err var
    #is never in the variable name within the netCDF if self_update == False.
    ob_err_var_str = ''
    if use_oberrvar == True:
        ob_err_var_str = str(ob_err_var[0]).replace('.', '-')
    #if we don't want the observation error variance used in the name of the file
    #and/or the names of the variables in the file
    elif use_oberrvar == False:
        ob_err_var = ''

    #if we are only updating the variable type that matches the observation type
    if self_update == True:
        for var in state.vars():
            checkfile = outdir_date_ens + '*'
            # If we're self-updating variables and we are on the 2nd or later variable in
            # the loop, this will return with something
            existing_file = glob.glob(checkfile)

            #if other variables already exists, append to the netCDF
            if existing_file != []:
                print('Appending to existing file!')
                #should only be one file in there, convert from list to string
                existing_file = existing_file[0]

                # append to the existing file
                with Dataset(existing_file, 'a') as dset:
                    print('Writing variable {}'.format(var))
                    dset.createVariable(var + ob_err_var_str, np.float32, (
                        'time',
                        'lat',
                        'lon',
                        'ens',
                    ))
                    dset.variables[var +
                                   ob_err_var_str].units = ef.get_units(var)
                    dset.variables[var + ob_err_var_str][:] = state[var].values
                #if the filename already contains a .nc because we created the
                #file we are appending to separately from this run, delete
                #the .nc
                existing_file = existing_file.replace('.nc', '')
                # Rename the checkfile so the filename no longer specifies a
                # single variable type- add new variable to filename
                newfile = existing_file + '_' + ef.var_num_string(
                    ob_type, ob_err_var) + '.nc'
                os.system('mv {} {}'.format(existing_file + '.nc', newfile))
            else:
                # If the checkfile does not exist, make a new file
                outfile = outdir_date_ens + '_' + ef.var_num_string(
                    ob_type, ob_err_var) + '.nc'
                ef.make_netcdf(state, outfile, ob_err_var_str)

    #if we are updating all variable types with the observation (regardless of its type)
    #and use_oberrvar == True, observation error variance is not saved in the
    #variable name in the netCDF, but it is saved in the filename.
    elif self_update == False:
        outfile = outdir_date_ens + '_' + ef.var_num_string(
            ob_type, ob_err_var) + '.nc'
        ef.make_netcdf(state, outfile)
Пример #8
0
def create_new_netcdf(date, ens_type, in_vrbls, vrbls):
    """
    This function creates a netCDF from a raw TIGGE netCDF. The main purpose
    of this is to change surface pressure to altimeter setting, calculate
    6-hourly precipitation, and to rename/shorten variable names. Unfortunately
    using the float32 format for the variables uses twice the memory of the 
    raw TIGGE int16 format. 
    """

    y = date.strftime('%Y')
    m = date.strftime('%m')
    d = date.strftime('%d')
    h = date.strftime('%H')

    print('Working on ' + d + '_' + h + ' ' + ens_type)

    #rename TIGGE variable names
    vardict = {
        'T2M': 't2m',
        'ALT': 'sp',
        'P6HR': 'tp',
        'TCW': 'tcw',
        'elev': 'orog',
        'lat': 'latitude',
        'lon': 'longitude',
        'time': 'time',
        'mem': 'number'
    }

    #build a var string corresponding with naming convention of tigge files
    invar_string = ef.var_string(in_vrbls)

    #build a var string corresponding with naming convention of output files
    outvar_string = ef.var_string(vrbls)

    # This is the input directory for the raw TIGGE netcdf
    indir = '/home/disk/hot/stangen/Documents/tigge_ensembles/' + ens_type + '/' + y + m + '/' + y + '-' + m + '-' + d + '_' + h + '_' + ens_type + '_' + invar_string + '_' + lev + '.nc'
    # This is the directory for the orography file
    orography = '/home/disk/hot/stangen/Documents/tigge_ensembles/orography/2013-04-01_00_' + ens_type + '.nc'
    # This is the output directory for the netcdf with altimeter setting
    outdir = '/home/disk/hot/stangen/Documents/prior_ensembles/' + ens_type + '/' + y + m + '/'

    #Create output directories if they don't yet exit
    if (os.path.isdir(outdir)):
        pass
    else:
        os.makedirs(outdir)

    #Read the ensemble netcdf file
    ncdata = Dataset(indir, 'r')

    # Shape of ncdata is nvars, ntimes, nmems, nlats, nlons
    #print(ncdata.variables.keys())
    tunit = ncdata.variables[vardict['time']].units
    ftimes = num2date(ncdata.variables[vardict['time']][:], tunit)
    nmems = len(ncdata.dimensions[vardict['mem']])
    ntimes = len(ftimes)
    nvars = len(vrbls)
    nlats = len(ncdata.dimensions[vardict['lat']])
    nlons = len(ncdata.dimensions[vardict['lon']])

    #time range of ensemble, for naming of file
    ftime_diff = ftimes[-1] - ftimes[0]
    tr = int((ftime_diff.days) * 24 + (ftime_diff.seconds) / 3600)
    tr_str = str(tr) + 'hrs'

    # Allocate the state array
    print('Allocating the state vector array...')
    state = np.zeros((nvars, ntimes, nlats, nlons, nmems))

    # For the metadata, need a list of locations
    lats = ncdata.variables[vardict['lat']][:][:, None]
    lons = ncdata.variables[vardict['lon']][:][None, :]

    # Do a 2d mesh of lat and lon
    lonarr, latarr = np.meshgrid(lons, lats)

    #And an array of ensemble members
    memarr = np.arange(1, nmems + 1)

    # Now to populate the state array
    for va, var in enumerate(vrbls):
        #reason index 0:2 is checked is that QF and D-QF can take place at different
        #levels (QF850, D-QF850), so to avoid specifying each possible variable name,
        #just check 1st 2 chars.
        if var[0:2] not in ['QF', 'D-']:
            field = ncdata.variables[vardict[var]][:, :, :, :]
            #print(field)
            print('Adding variable {}'.format(var))
            #convert surface pressure to altimeter setting
            if var == 'ALT':
                #Read the orography netcdf file- for calculating altimeter setting
                #has a time index, even when only one time is gotten from TIGGE- requires
                #indexing like [0,:,:] to get this first (and only) time.
                orogdata = Dataset(orography, 'r')
                elev = orogdata.variables[vardict['elev']][0, :, :]
                #Convert surface pressure to altimeter setting in mb
                #pressure in netcdf file is in pascals
                presinmb = field / 100
                field = presinmb / ((288 - 0.0065 * elev) / 288)**5.2561
            #find 6-hourly precipitation
            if var == 'P6HR':
                #create dummy field to facilitate subtracting of total precipitation
                #t-1 from t without saving over t, so the next subtraction still works
                field2 = np.zeros((ntimes, nmems, nlats, nlons))
                for t in range(0, ntimes):
                    if t == 0:
                        field2[t, :, :, :] = field[t, :, :, :]
                    #subtract previous time's precipitation to get 6 hour precipitation
                    elif t > 0:
                        field2[t, :, :, :] = field[t, :, :, :] - field[
                            t - 1, :, :, :]
                #reassign 6 hour precip to field
                field = field2
        #magnitude of moisture flux
        if var[0:2] == 'QF':
            q = ncdata.variables['q'][:, :, :, :]
            u = ncdata.variables['u'][:, :, :, :]
            v = ncdata.variables['v'][:, :, :, :]
            #moisture flux is qV, to find magnitude find distance from origin
            #to point, multiply by q, multiply by 1000 to get in units of g/kg.
            field = q * np.sqrt(u**2 + v**2) * 1000
        #direction of moisture flux (-180 to 180, unit circle degrees)
        if var[0:2] == 'D-':
            u = ncdata.variables['u'][:, :, :, :]
            v = ncdata.variables['v'][:, :, :, :]
            field = np.arctan2(v, u) * 180 / np.pi
            #print(field.shape)
        # make the ensemble dimension at the end of state
        field = np.swapaxes(field, 1, 3)
        field = np.swapaxes(field, 1, 2)
        # Populate its component of the state array
        state[va, :, :, :, :] = field

    print('Writing to netcdf...')
    # Convert times back to integers
    valid_times = date2num(ftimes, tunit)

    # Write ensemble forecast to netcdf - change name here
    #dset = Dataset(outdir+y+'-'+m+'-'+d+'_'+h+'_'+ens_type+'_'+outvar_string+'.nc','w')
    dset = Dataset(
        outdir + y + '-' + m + '-' + d + '_' + h + '_' + tr_str + '_' +
        outvar_string + '44.nc', 'w')
    dset.createDimension('time', None)
    dset.createDimension('lat', nlats)
    dset.createDimension('lon', nlons)
    dset.createDimension('ens', nmems)
    dset.createVariable('time', 'i4', ('time', ))
    dset.createVariable('lat', np.float32, ('lat', ))
    dset.createVariable('lon', np.float32, ('lon'))
    dset.createVariable('ens', 'i4', ('ens', ))
    dset.variables['time'].units = tunit
    dset.variables['lat'].units = 'degrees_north'
    dset.variables['lon'].units = 'degrees_east'
    dset.variables['ens'].units = 'member_number'
    dset.variables['time'][:] = np.array(valid_times)
    dset.variables['lat'][:] = lats
    dset.variables['lon'][:] = lons
    dset.variables['ens'][:] = memarr
    for v, var in enumerate(vrbls):
        #var = vardict[var]
        print('Writing variable {}'.format(var))
        dset.createVariable(var, np.float32, (
            'time',
            'lat',
            'lon',
            'ens',
        ))
        dset.variables[var].units = ef.get_units(var)
        dset.variables[var][:] = state[v, :, :, :, :]
    #completes writing the file
    dset.close()
Пример #9
0
    elif use_oberrvar == 'false':
        use_oberrvar = False
    #are the observations only updating their corresponding variable, or
    #are they updating all variables? -ie t2m only updates t2m, alt only updates alt
    self_update = sys.argv[20]
    if self_update == 'true':
        self_update = True  #true if you want the above updates, otherwise false
    elif self_update == 'false':
        self_update = False

    datestr = startstr + '-' + endstr

save_dir = '/home/disk/hot/stangen/Documents/EFA/duplicate_madaus/mse_var_output/'

#variable string (for saving the .txt file)
varstr = ef.var_string(allobs)

#make a list of netCDF variable names/strings from obs+obs error var
#post_vrbls contains the variable names in the filename in the posterior
netcdf_varnames = []

#make a list of variable names we want to save to the .txt file (this may include ob err variance,
#even when not doing self-update)
dict_varnames = []

for i, ob in enumerate(ob_types):
    #If we are loading prior to do stats on, we need to set ob error variance to
    #[''] to make variable in netCDF load correctly.
    #also, if we did not previously use the obs err var in creating the posterior
    #ens name/variable names, or did not only self-update each variable,
    #set ob_err_var to [''], since ob err var was not included in names.
Пример #10
0
savedir = '/home/disk/hot/stangen/Documents/EFA/duplicate_madaus/plots/'

#create strings for loading txt file containing the stats
sy = start_date.strftime('%Y')
sm = start_date.strftime('%m')
sd = start_date.strftime('%d')
sh = start_date.strftime('%H')

ey = end_date.strftime('%Y')
em = end_date.strftime('%m')
ed = end_date.strftime('%d')
eh = end_date.strftime('%H')

datestr=sy+sm+sd+sh+'-'+ey+em+ed+eh

varstr = ef.var_string(variables)


filepath = filedir+datestr+'_'+varstr
if ob_category == 'gridded':
    filepath += '_gridobs.txt'
elif ob_category == 'madis':
    filepath += '.txt'

f1 = open(filepath, 'r')
stats = f1.readlines()

stats_dict = {}

for line in stats:
    line_split = line.split(',')
Пример #11
0
fd = forecast_time.strftime('%d')
fh = forecast_time.strftime('%H')

#convert to grid indices, add 1 to the right endpoints so python includes the last index

l = int((w + 180) * 2)
r = int((e + 180) * 2) + 1
t = int((90 - n) * 2)
b = int((90 - s) * 2) + 1

#deal with crossing over 180W
if l > r:
    l = l - 720

#convert lists of variables into strings for loading files
prior_var_str = ef.var_string(prior_var)
grid_str = ef.var_string(grid)

#get forecast index- i.e how many timesteps after initialization we are looking at
timediff = analysis_time - forecast_time
tdd = timediff.days
tds = timediff.seconds
tdh = tdd * 24 + tds / 3600
#timesteps of forecast output are 6 hours apart
timeind = int(tdh / 6)

#dictionary for ensemble types, for use in plotting
ens_dict = {'ncep': 'GEFS', 'eccc': 'CMC', 'ecmwf': 'ECMWF'}

# Filepath of the analysis at the desired time
analysis_path = '/home/disk/hot/stangen/Documents/prior_ensembles/' + ens + '/' + ay + am + '/' + ay + '-' + am + '-' + ad + '_' + ah + '_' + efh + '_' + prior_var_str + '.nc'
Пример #12
0
    end_index=int(sys.argv[11])
    boolstr=sys.argv[12]
    #change string to boolean for loading prior or posterior ensembles
    if boolstr == 'true':
        post=True
    elif boolstr =='false':
        post=False     
    loc_rad = sys.argv[13]  
    inflation = sys.argv[14]
    ob_category = sys.argv[15]
    
    datestr = startstr+'-'+endstr

save_dir = '/home/disk/hot/stangen/Documents/EFA/duplicate_madaus/mse_var_output/'    
#variable string
varstr = ef.var_string(allobs)
#make a list of netCDF variable names/strings in part of file name from obs/obs error var
obtype_errvar = []
for i, ob in enumerate(ob_types):
     obtype_errvar.append(ef.var_num_string([ob],[ob_err_var[i]]))
#prior/post string
if post==True:
    prior_or_post='loc'+loc_rad
if post==False:
    prior_or_post='prior'
#last forecast hour I want to get observations for
end_hour = 6*end_index

#the dict where all the data is
ob_dict = {}
#dict for the means from the data
Пример #13
0
#start and end date to get ensembles. 
start_date = datetime(2015,11,10,0) #YYYY,m,d,h
end_date = datetime(2015,11,17,12)
hourstep = 12 #how often you want a new forecast initialization, usually 12 hr
#variables with names coming from the raw TIGGE- see get_tigge_data if unsure of names.
#the order matters to make filename match exactly. 
surf_variables = ['D2M','SP','U10','V10'] #surface variables
upper_variables = ['Q','U','V'] #upper variables
levels = ['1000','925','850','700','500','300'] # levels in the upper level netCDF
end_variables = ['IWV','IVT','D-IVT'] #variables we want to produce from this script.
#------------------------------------------------------------------------------

#a list of dates to loop through to load each forecast initialized on these dates
dates = mt.make_datetimelist(start_date,end_date,hourstep)  
#strings for loading the surface and aloft TIGGE netCDFs
surf_str = ef.var_string(surf_variables)+'_sfc.nc'
upper_str = ef.var_string(upper_variables)+'_'+ef.var_string(levels)+'_pl.nc'

#number of variables in the netCDF produced from this script
nvars = len(end_variables)

g = 9.80665

outvar_string = ef.var_string(end_variables)

for ens in ensemble_type:
    for date in dates:

        y,m,d,h = ef.dt_str_timedelta(date)
        
        print('Working on '+d+'_'+h+' '+ens)
Пример #14
0
 def load_netcdfs(self,post=False,ob_cat='madis',ob_upd='ob_update_self',inf='none',lr='1000'):
     """
     Loads the ensemble netCDF and the elevation netCDF. 
     Packages and returns ensemble data into an EnsembleState (xarray)
     object for use in efa_xray code. Also returns latitudes, longitudes, 
     and elevations of the ensemble type. 
     If posterior ensemble, there are more options for exactly what EFA 
     took place for finding the right file. 
     
     post = boolean, if true, we are loading the posterior, if false, loading the prior.
     posterior options:
         ob_cat = observation category, either 'madis' or 'gridded' observations
         ob_upd = observation update, is either 'ob_update_self' or 'ob_update_all'-
         did we use the observations to update just their corresponding variables,
         or did we allow them to update all variable types in the ensemble?
         inf = inflation
         lr = localization radius we used
         
     """
     
     # directory where the ensemble of all times is
     if post==False:
         if self.new_format == False:
             infile = '/home/disk/hot/stangen/Documents/prior_ensembles/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.ens_type+'_'+self.var_string+'.nc' 
         elif self.new_format == True:
             infile = '/home/disk/hot/stangen/Documents/prior_ensembles/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.efh+'_'+self.var_string+'.nc'
         prior_or_post='prior'
     elif post==True:
         post_varstring = ef.var_string(self.post_vrbls)
         if self.new_format == False:
             infile = '/home/disk/hot/stangen/Documents/posterior_ensembles/'+ob_cat+'/'+ob_upd+('/inf_'+inf).replace('.','-')+'/loc_'+str(lr)+'/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.ens_type+'_'+post_varstring+'.nc' 
         elif self.new_format == True:
             infile = '/home/disk/hot/stangen/Documents/posterior_ensembles/'+ob_cat+'/'+ob_upd+('/inf_'+inf).replace('.','-')+'/loc_'+str(lr)+'/'+self.ens_type+'/'+self.y+self.m+'/'+self.y+'-'+self.m+'-'+self.d+'_'+self.h+'_'+self.efh+'_'+str(self.l)+'_'+str(self.r)+'_'+str(self.t)+'_'+str(self.b)+'_'+str(self.s)+'_'+post_varstring+'.nc'                 
         prior_or_post='posterior'
     print('loading netcdf file: '+prior_or_post+': '+self.ens_type+' '+self.y+self.m+self.d+'_'+self.h+'00')
     # loading/accessing the netcdf data            
     ncdata = Dataset(infile,'r')
     #print(ncdata.variables.keys())
     times = ncdata.variables['time']
     ftimes = num2date(times[:],
                       times.units)
     lats = ncdata.variables['lat'][:]
     lons = ncdata.variables['lon'][:]
     mems = ncdata.variables['ens'][:]
     #print(ncdata.variables)
     
     # directory where the orography file is
     orography = '/home/disk/hot/stangen/Documents/tigge_ensembles/orography/2013-04-01_00_'+self.ens_type+'.nc'
     orog_data = Dataset(orography,"r")
     #print(of.variables)
     elevs = orog_data.variables['orog'][0,:] # Elevation of ecmwf, eccc, ncep  
     
     # storing the variable data in a dict (state?)
     allvars = {}
     for var in self.update_var:
         allvars[var] = (['validtime','y','x','mem'],
                         ncdata.variables[var][:])
     lonarr, latarr = np.meshgrid(lons, lats)
     
     pack_str = ef.var_string(self.update_var)
     print('packaging '+pack_str+' into EnsembleState object')
     # Package into an EnsembleState object knowing the state and metadata
     statecls = EnsembleState.from_vardict(allvars,
                                   {'validtime' : ftimes,
                                    'lat' : (['y','x'], latarr),
                                    'lon' : (['y','x'], lonarr),
                                    'mem' : mems,
                                    })
     
     return statecls, lats, lons, elevs