Пример #1
0
def apply_smoother(varalias,vardict,output_dates=None,method=None,date_incr=None,**kwargs):
    """
    Applies a smoother to the data
    **kwargs includes method specific input for chosen method
    Methods are:
            blockMean
            GP
            GAM
            Lanczos
            ...
    Caution:    for some smoothers much more of time series has
                to be included.
    """
    print('Apply smoother')
    print('Smooth data using method:',method)
    stdvarname = variable_info[varalias]['standard_name']
    newdict = deepcopy(vardict)
    # determine the output grid
    if (isinstance(date_incr,int) and output_dates is None):
    # increments are in #hours
    # create output grid --> list of time stamps depending on choice
        sd = vardict['datetime'][0]
        ed = vardict['datetime'][-1]
        #steps = int((ed-sd).total_seconds()/(date_incr*60*60))+1
        steps = int((ed-sd).total_seconds()/(60*60))+1
        tmpd = sd
        output_dates = [tmpd + timedelta(hours=i) \
                        for i in range(0,steps,date_incr) \
                        if (tmpd + timedelta(hours=i) <= ed)]
        del tmpd
    elif output_dates is None: # original datetimes are used
        output_dates = vardict['datetime']
    output_grid = netCDF4.date2num(output_dates,\
                                   units=vardict['time_unit'])
    # align datetime and output_grid/output_dates
    idx = collocate_times(vardict['datetime'],output_dates)
    idx_output_dates = collocate_times(output_dates,
                       list(np.array(vardict['datetime'])[idx]))
    output_dates = list(np.array(output_dates)[idx_output_dates])
    output_grid = list(np.array(output_grid)[idx_output_dates])
    # do smoothing
    smoothed_ts = smoothing(varalias,newdict,output_grid,\
                            output_dates,method=method,\
                            date_incr=date_incr,\
                            **kwargs)
    newdict[stdvarname] = list(smoothed_ts)
    for key in newdict:
        if (key != stdvarname and key != 'time_unit' and key != 'meta'):
            newdict[key] = list(np.array(newdict[key])[idx])
    return newdict
Пример #2
0
def validate_collocated_values(dtime, obs, mods, **kwargs):
    target_t, sdate, edate, twin = None, None, None, None
    if ('col_obj' in kwargs.keys() and kwargs['col_obj'] is not None):
        mods = col_obj.vars['model_values']
        obs = col_obj.vars['obs_values']
        dtime = col_obj.vars['datetime']
    # get idx for date and twin
    if 'target_t' in kwargs.keys():
        target_t = kwargs['target_t']
    if 'sdate' in kwargs.keys():
        sdate = kwargs['sdate']
    if 'edate' in kwargs.keys():
        edate = kwargs['edate']
    if 'twin' in kwargs.keys():
        twin = kwargs['twin']
    idx = collocate_times(dtime,
                          target_t=target_t,
                          sdate=sdate,
                          edate=edate,
                          twin=twin)
    mods = np.array(mods)[idx]
    obs = np.array(obs)[idx]
    results_dict = {'model_values': mods, 'obs_values': obs}
    # validate
    from wavy.validationmod import validate, disp_validation
    validation_dict = validate(results_dict)
    disp_validation(validation_dict)
    return validation_dict
Пример #3
0
def collocate_field(mc_obj=None,
                    obs_obj=None,
                    col_obj=None,
                    distlim=None,
                    datein=None,
                    model_lats=None,
                    model_lons=None,
                    model_vals=None):
    """
    Some info
    """
    if mc_obj is not None:
        datein = netCDF4.num2date(mc_obj.vars['time'],
                                  mc_obj.vars['time_unit'])
        model_lats = mc_obj.vars['latitude']
        model_lons = mc_obj.vars['longitude']
        model_vals = mc_obj.vars[mc_obj.stdvarname]
    dtime = netCDF4.num2date(obs_obj.vars['time'], obs_obj.vars['time_unit'])
    if isinstance(dtime, np.ndarray):
        dtime = list(dtime)
    if isinstance(datein, np.ndarray):
        datein = list(datein)
    if isinstance(datein, datetime):
        datein = [datein]
    cidx = collocate_times(dtime, target_t=datein, twin=obs_obj.twin)
    obs_time_dt = np.array(dtime)[cidx]
    obs_time_dt = [
        datetime(t.year, t.month, t.day, t.hour, t.minute, t.second)
        for t in obs_time_dt
    ]
    datein = [
        datetime(t.year, t.month, t.day, t.hour, t.minute, t.second)
        for t in datein
    ]
    obs_time = np.array(obs_obj.vars['time'])[cidx]
    obs_time_unit = obs_obj.vars['time_unit']
    # Compare wave heights of satellite with model with
    #   constraint on distance and time frame
    # 1. time constraint
    obs_lats = np.array(obs_obj.vars['latitude'])[cidx]
    obs_lons = np.array(obs_obj.vars['longitude'])[cidx]
    obs_vals = np.array(obs_obj.vars[obs_obj.stdvarname])[cidx]
    if distlim == None:
        distlim = 6
    if (col_obj is None):
        print("No collocation idx available")
        print(len(obs_time_dt), "footprints to be collocated")
        print ("Perform collocation with distance limit\n",\
                "distlim:",distlim)
        index_array_2d, distance_array, _ =\
                                collocation_fct(
                                obs_lons, obs_lats,
                                model_lons, model_lats)
        # caution: index_array_2d is tuple
        # impose distlim
        dist_idx = np.where( (distance_array<distlim*1000)&\
                             (~np.isnan(\
                                model_vals[index_array_2d[0],\
                                index_array_2d[1]])) )[0]
        idx_x = index_array_2d[0][dist_idx]
        idx_y = index_array_2d[1][dist_idx]
        results_dict = {
            'valid_date':datein,
            'time':list(obs_time[dist_idx]),
            'time_unit':obs_time_unit,
            'datetime':list(np.array(obs_time_dt)[dist_idx]),
            'distance':list(distance_array[dist_idx]),
            'model_values':list(model_vals[idx_x,\
                                           idx_y]),
            'model_lons':list(model_lons[idx_x,\
                                         idx_y]),
            'model_lats':list(model_lats[idx_x,\
                                         idx_y]),
            'obs_values':list(obs_vals[dist_idx]),
            'obs_lons':list(obs_lons[dist_idx]),
            'obs_lats':list(obs_lats[dist_idx]),
            'collocation_idx_x':list(idx_x),
            'collocation_idx_y':list(idx_y),
            }
    elif (col_obj is not None and \
    len(col_obj.vars['collocation_idx'][0]) > 0):
        print("Collocation idx given through collocation_class object")
        results_dict = col_obj.vars
        results_dict['model_values'] = list(\
                                 model_vals[\
                                 col_obj.vars['collocation_idx_x'],
                                 col_obj.vars['collocation_idx_y'] ])
    return results_dict
Пример #4
0
def collocate_satellite_ts(obs_obj=None,model=None,distlim=None,\
    leadtime=None,date_incr=None):
    """
    Some info
    """
    fc_date = make_fc_dates(obs_obj.sdate, obs_obj.edate, date_incr)
    fc_date = find_valid_fc_dates_for_model_and_leadtime(\
                            fc_date,model,leadtime)
    results_dict = {
        'valid_date': [],
        'time': [],
        'time_unit': obs_obj.vars['time_unit'],
        'datetime': [],
        'distance': [],
        'model_values': [],
        'model_lons': [],
        'model_lats': [],
        'obs_values': [],
        'obs_lons': [],
        'obs_lats': [],
        'collocation_idx_x': [],
        'collocation_idx_y': [],
    }
    for i in tqdm(range(len(fc_date))):
        #    for i in range(len(fc_date)):
        #        for f in range(1):
        with NoStdStreams():
            #            for t in range(1):
            try:
                # filter needed obs within time period
                idx = collocate_times(obs_obj.vars['datetime'],
                                      target_t=[fc_date[i]],
                                      twin=obs_obj.twin)
                # make tmp obs_obj with filtered data
                obs_obj_tmp = deepcopy(obs_obj)
                obs_obj_tmp.vars['time'] = list(\
                        np.array(obs_obj.vars['time'])[idx] )
                obs_obj_tmp.vars['latitude'] = list(\
                        np.array(obs_obj.vars['latitude'])[idx] )
                obs_obj_tmp.vars['longitude'] = list(\
                        np.array(obs_obj.vars['longitude'])[idx] )
                obs_obj_tmp.vars[obs_obj.stdvarname] = \
                        list(np.array(\
                        obs_obj.vars[obs_obj.stdvarname])[idx] )
                vardict, _, _, _, _ = get_model(model=model,
                                                fc_date=fc_date[i],
                                                varalias=obs_obj.varalias,
                                                leadtime=leadtime,
                                                transform_lons=180)
                results_dict_tmp = collocate_field(\
                                datein=fc_date[i],\
                                model_lats=vardict['latitude'],\
                                model_lons=vardict['longitude'],\
                                model_vals=vardict[obs_obj.stdvarname],\
                                obs_obj=obs_obj_tmp,\
                                distlim=distlim )
                # append to dict
                results_dict['valid_date'].append(fc_date[i])
                results_dict['time'].append(results_dict_tmp['time'])
                results_dict['datetime'].append(results_dict_tmp['datetime'])
                results_dict['distance'].append(results_dict_tmp['distance'])
                results_dict['model_values'].append(
                    results_dict_tmp['model_values'])
                results_dict['model_lons'].append(
                    results_dict_tmp['model_lons'])
                results_dict['model_lats'].append(
                    results_dict_tmp['model_lats'])
                results_dict['obs_values'].append(
                    results_dict_tmp['obs_values'])
                results_dict['obs_lats'].append(results_dict_tmp['obs_lats'])
                results_dict['obs_lons'].append(results_dict_tmp['obs_lons'])
                results_dict['collocation_idx_x'].append(\
                                results_dict_tmp['collocation_idx_x'])
                results_dict['collocation_idx_y'].append(\
                                results_dict_tmp['collocation_idx_y'])
                if 'results_dict_tmp' in locals():
                    del results_dict_tmp
            except (ValueError, FileNotFoundError, OSError) as e:
                # ValueError, pass if no collocation
                # FileNotFoundError, pass if file not accessible
                # OSError, pass if file not accessible from thredds
                print(e)
    # flatten all aggregated entries
    results_dict['time'] = flatten(results_dict['time'])
    results_dict['datetime'] = flatten(results_dict['datetime'])
    results_dict['distance'] = flatten(results_dict['distance'])
    results_dict['model_values'] = flatten(results_dict['model_values'])
    results_dict['model_lons'] = flatten(results_dict['model_lons'])
    results_dict['model_lats'] = flatten(results_dict['model_lats'])
    results_dict['obs_values'] = flatten(results_dict['obs_values'])
    results_dict['obs_lats'] = flatten(results_dict['obs_lats'])
    results_dict['obs_lons'] = flatten(results_dict['obs_lons'])
    results_dict['collocation_idx_x'] = flatten(\
                                results_dict['collocation_idx_x'])
    results_dict['collocation_idx_y'] = flatten(\
                                results_dict['collocation_idx_y'])
    return results_dict
Пример #5
0
def collocate_station_ts(obs_obj=None,model=None,distlim=None,\
    leadtime=None,date_incr=None):
    """
    Some info
    """
    fc_date = make_fc_dates(obs_obj.sdate, obs_obj.edate, date_incr)
    # get coinciding date between fc_date and dates in obs_obj
    idx1 = collocate_times(unfiltered_t=obs_obj.vars['datetime'],
                           target_t=fc_date,
                           twin=obs_obj.twin)
    # find valid/coinciding fc_dates
    if len(idx1) > len(fc_date):
        print('Muliple assignments within given time window')
        print('--> only closest to time stamp is chosen')
        idx_closest = get_closest_date(\
                    list(np.array(obs_obj.vars['datetime'])[idx1]),\
                    fc_date)
        idx1 = list(np.array(idx1)[idx_closest])
    # adjust obs_obj according to valid dates
    for key in obs_obj.vars.keys():
        if (key != 'time_unit' and key != 'meta'):
            obs_obj.vars[key] = list(np.array(obs_obj.vars[key])[idx1])
    # adjust again assumed fc_dates by filtered obs dates
    fc_date = obs_obj.vars['datetime']
    # find valid dates for given leadtime and model
    fc_date = find_valid_fc_dates_for_model_and_leadtime(\
                                    fc_date,model,leadtime)
    # check if file exists and if it includes desired time
    # if not check next possible file
    check = False
    for d in range(len(fc_date)):
        check = check_if_file_is_valid(fc_date[d], model, leadtime)
        if check == True:
            break
    if check == True:
        mc_obj = model_class(model=model,
                             fc_date=fc_date[d],
                             leadtime=leadtime,
                             varalias=obs_obj.varalias,
                             transform_lons=180)
        col_obj = collocation_class(mc_obj_in=mc_obj,
                                    obs_obj_in=obs_obj,
                                    distlim=distlim)
        model_vals = [col_obj.vars['model_values'][0]]
        tmpdate = hour_rounder(col_obj.vars['datetime'][0])
        model_datetime = [
            datetime(tmpdate.year, tmpdate.month, tmpdate.day, tmpdate.hour)
        ]
        model_time = [
            netCDF4.date2num(model_datetime[0],
                             units=col_obj.vars['time_unit'])
        ]
    if check == False:
        print('No valid model file available!')
    else:
        print('Collocating and appending values ...')
        for i in tqdm(range(d + 1, len(fc_date))):
            with NoStdStreams():
                try:
                    check = check_if_file_is_valid(fc_date[i], model, leadtime)
                    if check == False:
                        raise FileNotFoundError
                    mc_obj = model_class(model=model,
                                         fc_date=fc_date[i],
                                         leadtime=leadtime,
                                         varalias=obs_obj.varalias,
                                         transform_lons=180)
                    model_vals.append(
                            mc_obj.vars[\
                                mc_obj.stdvarname][ \
                                    col_obj.vars['collocation_idx_x'],\
                                    col_obj.vars['collocation_idx_y']\
                                ][0] )
                    model_time.append(mc_obj.vars['time'][0])
                    model_datetime.append( datetime(\
                                mc_obj.vars['datetime'][0].year,
                                mc_obj.vars['datetime'][0].month,
                                mc_obj.vars['datetime'][0].day,
                                mc_obj.vars['datetime'][0].hour ) )
                except FileNotFoundError as e:
                    print(e)
        # potentially there are different number of values
        # for obs and model
        # double check and use only coherent datetimes
        idx2 = collocate_times(model_datetime,
                               target_t=obs_obj.vars['datetime'],
                               twin=obs_obj.twin)
        col_obj.vars['model_values'] = list(np.array(\
                                                    model_vals)[idx2])
        col_obj.vars['time'] = list(np.array(model_time)\
                                            [idx2])
        col_obj.vars['datetime'] = list(np.array(\
                                            model_datetime)[idx2])
        idx3 = collocate_times(  \
                            unfiltered_t = obs_obj.vars['datetime'],
                            target_t = col_obj.vars['datetime'],
                            twin = obs_obj.twin)
        col_obj.vars['obs_values'] = list(
            np.array(obs_obj.vars[obs_obj.stdvarname])[idx3])
    # valid_date is meaningless for ts application and set to None
    col_obj.vars['valid_date'] = None
    # inflate length of constant sized variables
    col_obj.vars['distance'] = col_obj.vars['distance']*\
                                    len(col_obj.vars['datetime'])
    col_obj.vars['obs_lats'] = col_obj.vars['obs_lats']*\
                                    len(col_obj.vars['datetime'])
    col_obj.vars['obs_lons'] = col_obj.vars['obs_lons']*\
                                    len(col_obj.vars['datetime'])
    col_obj.vars['collocation_idx_x'] = col_obj.vars['collocation_idx_x']*\
                                    len(col_obj.vars['datetime'])
    col_obj.vars['collocation_idx_y'] = col_obj.vars['collocation_idx_y']*\
                                    len(col_obj.vars['datetime'])
    col_obj.vars['model_lats'] = col_obj.vars['model_lats']*\
                                    len(col_obj.vars['datetime'])
    col_obj.vars['model_lons'] = col_obj.vars['model_lons']*\
                                    len(col_obj.vars['datetime'])
    results_dict = col_obj.vars
    return results_dict
Пример #6
0
def collocate_poi_ts(indict,model=None,distlim=None,\
    leadtime=None,date_incr=None,varalias=None,twin=None):
    """
    indict: mandatory - lons, lats, time, values
            optional - leadtime, distlim, date_incr
    """
    # get stdvarname
    stdvarname = variable_info[varalias]['standard_name']
    # datetime or str
    if isinstance(indict['time'][0], str):
        poi_dtimes = [parse_date(t) for t in indict['time']]
    elif isinstance(indict['time'][0], datetime):
        poi_dtimes = indict['time']
    else:
        print('no valid time/datetime format for poi')
        print('use either str or datetime')
    fc_date = make_fc_dates(poi_dtimes[0], poi_dtimes[-1], date_incr)
    # get coinciding date between fc_date and dates in obs_obj
    idx1 = collocate_times(unfiltered_t=poi_dtimes,
                           target_t=fc_date,
                           twin=twin)
    # find valid/coinciding fc_dates
    if len(idx1) > len(fc_date):
        print('Muliple assignments within given time window')
        print('--> only closest to time stamp is chosen')
        idx_closest = get_closest_date(\
                    list(np.array(poi_dtimes)[idx1]),\
                    fc_date)
        idx1 = list(np.array(idx1)[idx_closest])
    # adjust obs_obj according to valid dates
    for key in indict.keys():
        if (key != 'time_unit' and key != 'meta' and key != 'nID'):
            indict[key] = list(np.array(indict[key])[idx1])
    poi_dtimes = list(np.array(poi_dtimes)[idx1])
    del idx1
    # find valid dates for given leadtime and model
    fc_date = find_valid_fc_dates_for_model_and_leadtime(\
                                    fc_date,model,leadtime)
    # adjust fc_date according to obs date
    idx2 = collocate_times(unfiltered_t=fc_date,
                           target_t=poi_dtimes,
                           twin=twin)
    fc_date = list(np.array(fc_date)[idx2])
    del idx2
    # compute time based on time unit from variable definition
    time_unit = variable_info['time']['units']
    time = netCDF4.date2num(poi_dtimes, time_unit)
    # check if file exists and if it includes desired time and append
    model_vals = []
    model_lons = []
    model_lats = []
    obs_vals = []
    obs_lons = []
    obs_lats = []
    collocation_idx_x = []
    collocation_idx_y = []
    distance = []
    time_lst = []
    dtimes = []
    switch = 0
    for d in tqdm(range(len(fc_date))):
        #        for t in range(1):
        with NoStdStreams():
            check = False
            check = check_if_file_is_valid(fc_date[d], model, leadtime)
            if check == True:
                # retrieve model
                fname = make_model_filename_wrapper(model, fc_date[d],
                                                    leadtime)
                # get hold of variable names (done only once)
                if switch == 0:
                    meta = ncdumpMeta(fname)
                    lonsname = get_filevarname('lons', variable_info,
                                               model_dict[model], meta)
                    latsname = get_filevarname('lats', variable_info,
                                               model_dict[model], meta)
                    timename = get_filevarname('time', variable_info,
                                               model_dict[model], meta)
                    filevarname = get_filevarname(varalias, variable_info,
                                                  model_dict[model], meta)
                    mlons = xr.open_dataset(fname)[lonsname].values
                    # secure lons from -180 to 180
                    mlons = ((mlons - 180) % 360) - 180
                    mlats = xr.open_dataset(fname)[latsname].values
                    # ensure matching dimension
                    if len(mlons.shape) == 1:
                        Mlons, Mlats = np.meshgrid(mlons, mlats)
                    else:
                        Mlons, Mlats = mlons, mlats
                    switch = 1
                plon = [indict['longitude'][d]]
                plat = [indict['latitude'][d]]
                index_array_2d, distance_array, _ = \
                        collocation_fct(plon,plat,Mlons,Mlats)
                dst = xr.open_dataset(fname)[timename].values
                tidx = list(dst).index(np.datetime64(fc_date[d]))
                # impose distlim
                if distance_array[0] < distlim * 1000:
                    idx_x = index_array_2d[0][0]
                    idx_y = index_array_2d[1][0]
                    model_lons.append(Mlons[idx_x, idx_y])
                    model_lats.append(Mlats[idx_x, idx_y])
                    vals = xr.open_dataset(fname)[filevarname]\
                                        [tidx,idx_x,idx_y].values
                    model_vals.append(vals.item())
                    obs_vals.append(indict['obs'][d])
                    obs_lons.append(indict['longitude'][d])
                    obs_lats.append(indict['latitude'][d])
                    collocation_idx_x.append(idx_x)
                    collocation_idx_y.append(idx_y)
                    distance.append(distance_array[0])
                    time_lst.append(time[d])
                    dtimes.append(poi_dtimes[d])
    results_dict = {
        'valid_date': dtimes,
        'time': time_lst,
        'time_unit': time_unit,
        'datetime': dtimes,
        'distance': distance,
        'model_values': model_vals,
        'model_lons': model_lons,
        'model_lats': model_lats,
        'obs_values': obs_vals,
        'obs_lons': obs_lons,
        'obs_lats': obs_lats,
        'collocation_idx_x': collocation_idx_x,
        'collocation_idx_y': collocation_idx_y
    }
    return results_dict
Пример #7
0
def get_d22_dict(**kwargs):
    sdate = kwargs.get('sdate')
    edate = kwargs.get('edate')
    basedate = kwargs.get('basedate')
    nID = kwargs.get('nID')
    sensor = kwargs.get('sensor')
    varalias = kwargs.get('varalias')
    pathlst = kwargs.get('pathlst')
    strsublst = kwargs.get('strsublst')
    dict_for_sub = kwargs.get('dict_for_sub')
    stdvarname = variable_info[varalias]['standard_name']
    var, time, timedt = \
        get_d22_ts(sdate,edate,basedate,nID,sensor,varalias,\
                    pathlst,strsublst,dict_for_sub)
    if 'twin' in insitu_dict[nID]:
        idxtmp = collocate_times(unfiltered_t=timedt,\
                            sdate=sdate,edate=edate,
                            twin=insitu_dict[nID]['twin'])
    else:
        # default to allow for a 1 min variation
        idxtmp = collocate_times(unfiltered_t=timedt,\
                            sdate=sdate,edate=edate,
                            twin=1)
    # convert to list for consistency with other classes
    # and make sure that only steps with existing obs are included
    time = [time[i] for i in idxtmp if i < len(var)]
    timedt = [timedt[i] for i in idxtmp if i < len(var)]
    var = [np.real(var[i]) for i in idxtmp if i < len(var)]
    # rm double entries due to 10min spacing
    if ('unique' in kwargs.keys() and kwargs['unique'] is True):
        # delete 10,30,50 min times, keep 00,20,40
        # 1. create artificial time vector for collocation
        tmpdate = deepcopy(sdate)
        tmpdatelst = []
        while tmpdate<edate:
            tmpdatelst.append(tmpdate)
            tmpdate += timedelta(minutes=20)
        # 2. collocate times
        if 'twin' in insitu_dict[nID]:
            idxtmp = collocate_times(\
                        unfiltered_t=timedt,
                        target_t=tmpdatelst,
                        twin=insitu_dict[nID]['twin'])
        else:
            idxtmp = collocate_times(unfiltered_t=timedt,\
                            target_t=tmpdatelst,
                            twin=1)
        time = list(np.array(time)[idxtmp])
        timedt = list(np.array(timedt)[idxtmp])
        var = list(np.array(var)[idxtmp])
    lons = [insitu_dict[nID]['coords'][sensor]['lon']]\
            *len(var)
    lats = [insitu_dict[nID]['coords'][sensor]['lat']]\
            *len(var)
    vardict = {
                stdvarname:var,
                'time':time,
                'datetime':timedt,
                'time_unit':variable_info['time']['units'],
                'longitude':lons,
                'latitude':lats
                }
    return vardict