Exemplo n.º 1
0
    def __init__(self,
                 nc_path,
                 var_name,
                 vcc_size=None,
                 vcc_nelems=None,
                 vcc_preemption=None):
        '''
        Parameters
        ----------
        nc_path : str
            File path to the netCDF4 dataset
        var_name : tuple of 2 ints, optional
            The name of main variable to be loaded.
        vcc_size : int, optional
            The netCDF4 variable chunk cache size in bytes
        vcc_nelems : int, optional
            The netCDF4 number of chunk slots in the 
            raw data chunk cache hash table.
        vcc_preemption : int, optional
            The netCDF4 var chunk cache preemption value.
        '''

        StationSerialDataDb.__init__(self, nc_path, var_name, vcc_size,
                                     vcc_nelems, vcc_preemption)
        self.chk_stnids = None
        self.chk_obs = None
        self.chk_deg_buf = None
        self.chk_bnds = None
Exemplo n.º 2
0
def proc_write(fpath_stndb, elem, fpath_out, nwrkers):

    status = MPI.Status()
    nwrkrs_done = 0

    stn_da = StationSerialDataDb(fpath_stndb, elem)
    stn_ids = stn_da.stn_ids
    stns = stn_da.stns
    stn_mask = np.logical_and(np.isfinite(stn_da.stns[MASK]),
                              np.isnan(stn_da.stns[BAD]))
    days = stn_da.days
    stn_da.ds.close()
    stn_da = None

    print "WRITER: Creating output station netCDF database..."

    create_quick_db(fpath_out, stns, days, DB_VARIABLES[elem])
    stnda_out = StationSerialDataDb(fpath_out, elem, mode='r+')

    mth_names = []
    for mth in np.arange(1, 13):

        norm_var_name = get_norm_varname(mth)
        stnda_out.add_stn_variable(norm_var_name,
                                   '',
                                   units='C',
                                   dtype='f8',
                                   fill_value=netCDF4.default_fillvals['f8'])
        mth_names.append(norm_var_name)

    stnda_out.ds.sync()

    print "WRITER: Output station netCDF database created."

    mths = np.arange(12)

    stat_chk = StatusCheck(np.sum(stn_mask), 50)

    while 1:

        stn_id, tair_daily, tair_norms = MPI.COMM_WORLD.recv(
            source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)

        if status.tag == TAG_STOPWORK:

            nwrkrs_done += 1
            if nwrkrs_done == nwrkers:
                print "WRITER: Finished"
                return 0
        else:

            x = np.nonzero(stn_ids == stn_id)[0][0]
            stnda_out.ds.variables[elem][:, x] = tair_daily

            for i in mths:
                stnda_out.ds.variables[mth_names[i]][x] = tair_norms[i]

            stnda_out.ds.sync()

            stat_chk.increment()
def proc_write(fpath_stndb, elem, nwrkers):

    status = MPI.Status()
    nwrkrs_done = 0

    stn_da = StationSerialDataDb(fpath_stndb, elem, mode="r+")
    mask_stns = np.logical_and(np.isfinite(stn_da.stns[MASK]),
                               np.isnan(stn_da.stns[BAD]))
    nstns = np.sum(mask_stns)

    dsvars = {}
    for mth in np.arange(1, 13):

        vname_nug = get_krigparam_varname(mth, VARIO_NUG)
        vname_psill = get_krigparam_varname(mth, VARIO_PSILL)
        vname_rng = get_krigparam_varname(mth, VARIO_RNG)

        dsvars[vname_nug] = stn_da.add_stn_variable(vname_nug, vname_nug,
                                                    "C**2", 'f8')
        dsvars[vname_psill] = stn_da.add_stn_variable(vname_psill, vname_nug,
                                                      "C**2", 'f8')
        dsvars[vname_rng] = stn_da.add_stn_variable(vname_rng, vname_nug, "km",
                                                    'f8')

    stat_chk = StatusCheck(nstns, 250)

    while 1:

        stn_id, nug, psill, rng = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE,
                                                      tag=MPI.ANY_TAG,
                                                      status=status)

        if status.tag == TAG_STOPWORK:

            nwrkrs_done += 1
            if nwrkrs_done == nwrkers:
                print "WRITER: Finished"
                return 0
        else:

            x = stn_da.stn_idxs[stn_id]

            for mth in np.arange(1, 13):

                dsvars[get_krigparam_varname(mth, VARIO_NUG)][x] = nug[mth - 1]
                dsvars[get_krigparam_varname(mth,
                                             VARIO_PSILL)][x] = psill[mth - 1]
                dsvars[get_krigparam_varname(mth, VARIO_RNG)][x] = rng[mth - 1]

            stn_da.ds.sync()

            stat_chk.increment()
def proc_coord(fpath_stndb, elem, nwrkers):

    stn_da = StationSerialDataDb(fpath_stndb, elem)
    # Only set kriging params for stations within mask and that are not marked as bad
    mask_stns = np.logical_and(np.isfinite(stn_da.stns[MASK]),
                               np.isnan(stn_da.stns[BAD]))
    stns = stn_da.stns[mask_stns]

    print "COORD: Done initialization. Starting to send work."

    cnt = 0
    nrec = 0

    for stn_id in stns[STN_ID]:

        if cnt < nwrkers:
            dest = cnt + N_NON_WRKRS
        else:
            dest = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG)
            nrec += 1

        MPI.COMM_WORLD.send(stn_id, dest=dest, tag=TAG_DOWORK)
        cnt += 1

    for w in np.arange(nwrkers):
        MPI.COMM_WORLD.send(None, dest=w + N_NON_WRKRS, tag=TAG_STOPWORK)

    print "COORD: done"
Exemplo n.º 5
0
    def __init__(self, path_db, tair_var):
        '''
        Parameters
        ----------
        path_db : str
            File path to a serially complete netCDF
            station database containing the stations and
            temperature variable for interpolation.
        tair_var : str
            The temperature variable for interpolation ('tmin' or 'tmax')
        '''

        stn_da = StationSerialDataDb(path_db, tair_var, vcc_size=470560000 * 2)
        mask_stns = np.isnan(stn_da.stns[BAD])
        stn_slct = StationSelect(stn_da,
                                 stn_mask=mask_stns,
                                 rm_zero_dist_stns=True)

        krig_tair = KrigTair(stn_slct)
        gwr_tair = GwrTairAnom(stn_slct)
        interp_tair = InterpTair(krig_tair, gwr_tair)

        self.stn_da = stn_da
        self.interp_tair = interp_tair
        self.mth_masks = stn_da.mth_idx
Exemplo n.º 6
0
def proc_coord(fpath_stndb, elem, climdivs, nwrkers):

    stn_da = StationSerialDataDb(fpath_stndb, elem)
    # Only run xval optimization for stations within mask and that are not marked as bad
    mask_stns = np.logical_and(np.isfinite(stn_da.stns[MASK]),
                               np.isnan(stn_da.stns[BAD]))
    stns = stn_da.stns[mask_stns]

    # Send stn ids to all processes
    MPI.COMM_WORLD.bcast(stns[STN_ID], root=RANK_COORD)

    print "COORD: Done initialization. Starting to send work."

    cnt = 0
    nrec = 0

    for climdiv in climdivs:

        mask_stns_xval = np.logical_and(stn_da.stns[CLIMDIV] == climdiv,
                                        mask_stns)
        stn_ids_xval = stn_da.stn_ids[mask_stns_xval]

        for stn_id in stn_ids_xval:

            if cnt < nwrkers:
                dest = cnt + N_NON_WRKRS
            else:
                dest = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE,
                                           tag=MPI.ANY_TAG)
                nrec += 1

            MPI.COMM_WORLD.send(stn_id, dest=dest, tag=TAG_DOWORK)
            cnt += 1

        print "".join(
            ["COORD: Finished xval of climate division: ",
             str(climdiv)])

    for w in np.arange(nwrkers):
        MPI.COMM_WORLD.send(None, dest=w + N_NON_WRKRS, tag=TAG_STOPWORK)

    print "COORD: done"
Exemplo n.º 7
0
    def __init__(self, path_db, tair_var):
        '''
        Parameters
        ----------
        path_db : str
            File path to a serially complete netCDF
            station database containing the stations and
            temperature variable for interpolation.
        tair_var : str
            The temperature variable for interpolation ('tmin' or 'tmax')
        '''

        stn_da = StationSerialDataDb(path_db, tair_var)
        mask_stns = np.isnan(stn_da.stns[BAD])
        stn_slct = StationSelect(stn_da,
                                 stn_mask=mask_stns,
                                 rm_zero_dist_stns=True)

        self.krig = KrigTairAll(stn_slct)
        self.stn_da = stn_da
Exemplo n.º 8
0
    def load_obs(self, stn_ids, mth=None):
        '''
        Load station observations.
        
        Parameters
        ----------
        stn_ids : ndarray of str or str
            A numpy array of N station ids or a single station id
        mth : int, optional
            Only load observations for a specific month
            
        Returns
        -------
        obs : ndarray
            The station observations of shape P*N where P is the
            number of days and N is the number of stations. If only
            1 station, returns a 1-D array.
        '''

        if mth == None:
            return StationSerialDataDb.load_obs(self, stn_ids, mth)
        else:
            mask = np.nonzero(
                np.in1d(self.chk_stnids, stn_ids, assume_unique=True))[0]

            allNgh = False
            while not allNgh:
                if mask.size != stn_ids.size:
                    print "WARNING: Increasing obs chunk..."
                    self.set_obs(self.chk_bnds, self.chk_deg_buf + 1)
                    mask = np.nonzero(
                        np.in1d(self.chk_stnids, stn_ids,
                                assume_unique=True))[0]
                else:
                    allNgh = True

            obs = np.take(self.chk_obs[mth], mask, axis=1)

            return obs
Exemplo n.º 9
0
def proc_write(fpath_stndb, elem, climdivs, ngh_rng, path_out_optim, nwrkers):

    status = MPI.Status()
    nwrkrs_done = 0

    bcast_msg = None
    bcast_msg = MPI.COMM_WORLD.bcast(bcast_msg, root=RANK_COORD)
    stn_ids = bcast_msg
    print "WRITER: Received broadcast msg"

    stn_da = StationSerialDataDb(fpath_stndb, elem, mode="r+")
    stn_mask = np.in1d(stn_da.stn_ids, stn_ids, True)
    stns = stn_da.stns[stn_mask]

    climdiv_ds = {}
    ttl_xval_stns = 0
    for climdiv in climdivs:

        stnids_climdiv = stns[STN_ID][stns[CLIMDIV] == climdiv]

        a_ds = create_climdiv_optim_nstns_db(path_out_optim, elem,
                                             stnids_climdiv, ngh_rng, climdiv)
        climdiv_ds[climdiv] = a_ds, stnids_climdiv

        ttl_xval_stns += stnids_climdiv.size

    print "WRITER: Output NCDF files created"

    stn_idxs = {}
    for x in np.arange(stns.size):
        stn_idxs[stns[STN_ID][x]] = x

    min_ngh_wins = ngh_rng
    ngh_idxs = {}
    for x in np.arange(min_ngh_wins.size):
        ngh_idxs[min_ngh_wins[x]] = x

    ttl_xvals = ttl_xval_stns

    stat_chk = StatusCheck(ttl_xvals, 250)

    while 1:

        stn_id, mae, bias, r2 = MPI.COMM_WORLD.recv(source=MPI.ANY_SOURCE,
                                                    tag=MPI.ANY_TAG,
                                                    status=status)

        if status.tag == TAG_STOPWORK:

            nwrkrs_done += 1
            if nwrkrs_done == nwrkers:

                #######################################################
                print "WRITER: Setting the optim # of nghs..."

                set_optim_nstns_tair_anom(stn_da, path_out_optim)

                ######################################################

                print "WRITER: Finished"
                return 0

        else:

            stn = stns[stn_idxs[stn_id]]
            ds, stnids_climdiv = climdiv_ds[stn[CLIMDIV]]
            dim2 = np.nonzero(stnids_climdiv == stn_id)[0][0]
            ds.variables['mae'][:, :, dim2] = mae
            ds.sync()

            stat_chk.increment()
Exemplo n.º 10
0
interpolation.
'''

from twx.db import MASK, TDI, StationSerialDataDb, BAD, CLIMDIV
from twx.infill import set_bad_stations, find_dup_stns
from twx.interp import XvalOutlier
from twx.utils import TwxConfig
import numpy as np
import os

if __name__ == '__main__':

    twx_cfg = TwxConfig(os.getenv('TOPOWX_INI'))

    stnda_tmin = StationSerialDataDb(twx_cfg.fpath_stndata_nc_serial_tmin,
                                     'tmin',
                                     mode='r+')
    stnda_tmax = StationSerialDataDb(twx_cfg.fpath_stndata_nc_serial_tmax,
                                     'tmax',
                                     mode='r+')
    stnda_infill_tmin = StationSerialDataDb(
        twx_cfg.fpath_stndata_nc_infill_tmin, 'tmin')
    stnda_infill_tmax = StationSerialDataDb(
        twx_cfg.fpath_stndata_nc_infill_tmax, 'tmax')

    # Load station ids that were marked as "bad" due to infilling issues
    bad_stnids = np.unique(
        np.loadtxt(twx_cfg.fpath_flagged_bad_stns,
                   delimiter=",",
                   dtype=np.str,
                   skiprows=1,