Exemplo n.º 1
0
    wdsrf
except NameError:
    ws = {th: list() for th in target_hgts}
    wd = {th: list() for th in target_hgts}
    wdsrf = list()

    select_rain = 'all'

    for year in years:

        wpr = parse_data.windprof(year=year)
        wspd = wpr.dframe.wspd
        wdir = wpr.dframe.wdir
        hgt = wpr.hgt

        czd = parse_data.surface('czd', year=year)
        bby = parse_data.surface('bby', year=year)

        if select_rain == 'all':
            select = None
        elif select_rain == 'czd':
            rain_czd = czd.dframe.precip > 0
            select = rain_czd[rain_czd].index
        elif select_rain == 'bby':
            rain_bby = bby.dframe.precip > 0
            select = rain_bby[rain_bby].index
        elif select_rain == 'norain':
            norain_czd = czd.dframe.precip == 0
            norain_bby = bby.dframe.precip == 0
            norain = norain_czd & norain_bby
            select = norain[norain].index
except NameError:

    results = collections.OrderedDict()
    for tr in thres:
        results[tr] = {'U': U, 'V': V}

    for year in years:
        for p in params:
            tta = tta_analysis(year=year)

            tta.start_df_layer(**p)
            tta_dates = tta.tta_dates

            " parse surface and profile obs "
            bby = parse_data.surface('bby', year=year)
            wpr = parse_data.windprof(year=year)

            wpr_tta = wpr.dframe.loc[tta_dates]
            wdr_tta = wpr_tta['wdir']
            wsp_tta = wpr_tta['wspd']

            bby_tta = bby.dframe.loc[tta_dates]

            " append surface values to windprof "
            surf_wsp = iter(bby_tta.wspd.values.tolist())
            surf_wdr = iter(bby_tta.wdir.values.tolist())

            wsp_tta = wsp_tta.map(lambda x: [surf_wsp.next()] + x)
            wdr_tta = wdr_tta.map(lambda x: [surf_wdr.next()] + x)
#years = [1998]
years = [1998]+range(2001,2013)

try:
    WS
except NameError:
#    ws = {th:list() for th in target_hgts}
#    wd = {th:list() for th in target_hgts}
#    wdsrf = list()
    
    WS = pd.DataFrame()
    WD = pd.DataFrame()
    
    for year in years:
          
        czd = parse_data.surface('czd', year=year)        
        bby = parse_data.surface('bby', year=year)
        wpr = parse_data.windprof(year=year)
        hgt = wpr.hgt
        
        ''' reduce to common time period '''
        first_bby = bby.dframe.index[0]
        first_czd = czd.dframe.index[0]
        first_wpr = wpr.dframe.index[0]
    
        last_bby = bby.dframe.index[-1]
        last_czd = czd.dframe.index[-1]
        last_wpr = wpr.dframe.index[-1]
        
        first = max(first_bby,first_czd,first_wpr)   
        last  = min(last_bby,last_czd,last_wpr)
Exemplo n.º 4
0
import parse_data
from ctext import ctext

txtHeader1 = '\nSurface\n{:^35} || {:^35}'
print txtHeader1.format('Beg', 'End')

txtHeader2 = '{:^16} | {:^16} || {:^16} {:^16}'
print txtHeader2.format('BBY', 'CZD', 'CZD', 'BBY')

t = ctext('{}')

for y in [1998] + range(2001, 2013):
        bby = parse_data.surface('bby', y)
        czd = parse_data.surface('czd', y)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()

        if beg_czd < beg_bby:
                tb = t.text + ' | ' + t.red()
        else:
                tb = t.text + ' | ' + t.text

        if end_czd > end_bby:
                te = t.red() + ' | ' + t.text
        else:
                te = t.text + ' | ' + t.text

        txtDate = tb + ' || ' + te

        fmt = '%Y-%m-%d %H:%M'
Exemplo n.º 5
0
def preprocess(years=None, layer=None, verbose=True):

    import pandas as pd
    import parse_data

    WD = pd.Series()
    WS = pd.Series()
    WD_rain = pd.Series()
    WS_rain = pd.Series()
    precip_good = pd.DataFrame()

    for year in years:

        wpr = parse_data.windprof(year=year)
        bby = parse_data.surface('bby', year=year)
        czd = parse_data.surface('czd', year=year)
        hgt = wpr.hgt

        ' find common time period '
        first_bby = bby.dframe.index[0]
        first_czd = czd.dframe.index[0]
        first_wpr = wpr.dframe.index[0]
        last_bby = bby.dframe.index[-1]
        last_czd = czd.dframe.index[-1]
        last_wpr = wpr.dframe.index[-1]
        first = max(first_bby, first_czd, first_wpr)
        last = min(last_bby, last_czd, last_wpr)

        ' reduce time interval so all start and end at same time '
        wpr = wpr.dframe.loc[first:last]
        bby = bby.dframe.loc[first:last]
        czd = czd.dframe.loc[first:last]

        ' append surface values to windprof to make entire profile '
        surf_wsp = iter(bby.wspd.values.tolist())
        surf_wdr = iter(bby.wdir.values.tolist())
        wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x)
        wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x)
        hgt = np.append([0],hgt)

        ' check nans on precip '
        precip = pd.concat([bby.precip, czd.precip], axis=1)
        precip.columns = ['bby', 'czd']
        precip_nans = precip.apply(lambda x: x.isnull().any(),
                                   axis=1, reduce=True)
        precip_nans.name = 'precip_nan'
        tx = 'year:{}, any_precip_nan:{:4d}'
        if verbose:
            print(tx.format(year, precip_nans.sum()))

        ' check entire profile nans ( same for ws and wd)'
        prof_nans = wsp.apply(lambda x: np.isnan(x).all())
        prof_nans.name = 'prof_nan'

        ' include only hours when surf and the entire' \
        ' profile is non-missing (profile is allowed to have' \
        ' at least one non-missing)'
        nan_df = pd.concat([precip_nans, prof_nans], axis=1)
        any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True)
        include = ~any_nan
        precip_good = precip_good.append(precip[include])

        ' rainy days at CZD '
        rain_czd = czd.precip > 0

        ' reduce and save to big Series '
        wdr = wdr[include]
        wsp = wsp[include]
        wdr_rain = wdr[rain_czd]
        wsp_rain = wsp[rain_czd]
        WD = WD.append(wdr)
        WS = WS.append(wsp)
        WD_rain = WD_rain.append(wdr_rain)
        WS_rain = WS_rain.append(wsp_rain)

    " compute components "
    WD_sin = WD.apply(lambda x: sin(x))
    WD_cos = WD.apply(lambda x: cos(x))
    U_df = -1 * WS.multiply(WD_sin)
    V_df = -1 * WS.multiply(WD_cos)
    wind_flow_180 = -(U_df * sin(180) + V_df * cos(180))
    wind_flow_90 = U_df * sin(90) + V_df * cos(90)

    " layer-mean"
    layer_idx = np.where((hgt >= layer[0]) &
                         (hgt < layer[1]))[0]
    mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx]))
    mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx]))
    wd_layer = 270-(np.arctan2(mean_V, mean_U)*180/np.pi)
    wd_layer[wd_layer > 360] -= 360
    wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]],
                                        hgt[layer_idx[-1]])

    return dict(WD=WD,
                WS=WS,
                WD_rain=WD_rain,
                WS_rain=WS_rain,
                wd_layer=wd_layer,
                precip=precip,
                precip_good=precip_good)
Exemplo n.º 6
0
    def start_df(self, wdir_surf=None, wdir_wprof=None, 
              rain_bby=None,rain_czd=None,nhours=None):

        '''
            this version uses pandas dataframe, 
            it should be more accurate and simpler
        '''

        import pandas as pd

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' trim the head and tail of dataset depending
            on the latest time of the beginning and 
            earliest of the ending '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)
        time_end = min(end_bby, end_czd, end_wpr)

        ''' initializations '''
        onehr = timedelta(hours=1)
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        count = 0
        rng = pd.date_range(start=time_beg,
                            end=time_end,
                            freq='1H')
        cols = ('wssrf','wswpr','wdsrf','wdwpr','rbby','rczd','tta','consecutive')
        df = pd.DataFrame(index=rng,columns=cols)       
        time = time_beg
        
        ''' loop evaluates each time '''
        while (time <= time_end):

            surf_wd = bby.dframe.loc[time].wdir
            wpr_wd0 = wprof.dframe.loc[time].wdir[0]  # first gate
            pbby = bby.dframe.loc[time].precip
            pczd = czd.dframe.loc[time].precip

            if surf_wd is None:
                surf_wd = np.nan

            df.loc[time].wdsrf = surf_wd
            df.loc[time].wdwpr = wpr_wd0
            df.loc[time].rbby = pbby
            df.loc[time].rczd = pczd
            df.loc[time].wssrf = bby.dframe.loc[time].wspd
            df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0]

            ''' check conditions '''
            cond1 = (surf_wd <= wdir_surf)
            cond2 = (wpr_wd0 <= wdir_wprof)
            if rain_bby and rain_czd:
                cond3 = (pbby >= rain_bby)
                cond4 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3 and cond4
            elif rain_czd:
                cond3 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3
            elif rain_bby:
                cond3 = (pbby >= rain_bby)
                tta_condition = cond1 and cond2 and cond3
            else:
                tta_condition = cond1 and cond2


            df.loc[time].tta = tta_condition

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                ' reset buffer '
                bool_buffer = np.array([False] * nhours)
                count = 0

            time += onehr

        df.consecutive = tta_bool.astype(bool)

        ar_wdsrf = df.wdsrf.values.astype(float)
        ar_wdwpr = df.wdwpr.values.astype(float)
        ar_rbby = df.rbby.values.astype(float)
        ar_rczd = df.rczd.values.astype(float)
        
        wdsrfIsNan = np.isnan(ar_wdsrf)
        wdwprIsNan = np.isnan(ar_wdwpr)
        rbbyIsNan = np.isnan(ar_rbby)
        rczdIsNan = np.isnan(ar_rczd)
        
        
        if rain_czd is None:
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan        
        elif rain_czd >= 0.25:
            ''' this boolean excludes dates when there is no
                precip at CZD '''       
            zeros = np.zeros((1,len(ar_rbby)))
            rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T)                  
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \
                    | rczdIsZero


        tot_rbby = np.round(df.rbby.sum(),0).astype(int)
        tot_rczd = np.round(df.rczd.sum(),0).astype(int)

        exc_rbby = np.round(df[exclude].rbby.sum(),0).astype(int)
        exc_rczd = np.round(df[exclude].rczd.sum(),0).astype(int)

        inc_rbby = tot_rbby - exc_rbby
        inc_rczd = tot_rczd - exc_rczd

        tot_hrs   = np.round(df.index.size,0).astype(int)
        exc_hours = np.round(exclude.sum(),0).astype(int)
        inc_hours = tot_hrs - exc_hours

        tta_rbby   = np.round(df[df.consecutive].rbby.sum(),0).astype(int)
        tta_rczd   = np.round(df[df.consecutive].rczd.sum(),0).astype(int)
        notta_rbby = inc_rbby - tta_rbby
        notta_rczd = inc_rczd - tta_rczd

        exclude_dates = df[exclude].index
        include_dates = df[~exclude].index
        tta_dates     = df[~exclude & df.consecutive].index
        notta_dates   = df[~exclude & ~df.consecutive].index

        tta_hours   = tta_dates.size
        notta_hours = notta_dates.size

        self.time_beg           = time_beg
        self.time_end           = time_end
        self.count_hrs_include  = inc_hours
        self.count_hrs_exclude  = exc_hours
        self.tot_rainfall_bby   = tot_rbby
        self.tot_rainfall_czd   = tot_rczd
        self.inc_rainfall_bby   = inc_rbby
        self.inc_rainfall_czd   = inc_rczd
        self.exc_rainfall_bby   = exc_rbby
        self.exc_rainfall_czd   = exc_rczd        
        self.tta_rainfall_bby   = tta_rbby
        self.tta_rainfall_czd   = tta_rczd
        self.notta_rainfall_bby = notta_rbby
        self.notta_rainfall_czd = notta_rczd
        self.tta_hours          = tta_hours
        self.notta_hours        = notta_hours
        self.wprof_hgt          = wprof.hgt
        self.exclude_dates      = exclude_dates
        self.include_dates      = include_dates
        self.tta_dates          = tta_dates
        self.notta_dates        = notta_dates
        self.df                 = df
Exemplo n.º 7
0
    def start(self, wdir_surf=None, wdir_wprof=None, 
              rain_bby=None,rain_czd=None,nhours=None):

        ''' this is an old verion
            prefer start_df that uses pandas dataframe
        '''

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' the latest of the beg '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)

        ''' the earliest of the end '''
        time_end = min(end_bby, end_czd, end_wpr)

        ''' rainfall before all obs start '''
        rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip)
        rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip)

        ''' rainfall after all obs end '''
        rbby_after = np.nansum(bby.dframe.loc[time_end:].precip)
        rczd_after = np.nansum(czd.dframe.loc[time_end:].precip)

        ''' number of windprofiles before (after)
            all obs start (end) '''
        nwprof_before = len(wprof.dframe.loc[:time_beg].wdir)
        nwprof_after = len(wprof.dframe.loc[time_end:].wdir)

        onehr = timedelta(hours=1)
        time = time_beg
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        rainfall_czd = np.array([])
        rainfall_bby = np.array([])
#        wpr_wd_inc = []
#        wpr_ws_inc = []
        count = 0
        count_while = 0
        count_exclude = 0

        while (time <= time_end):
                
            surf_wd = bby.dframe.loc[time].wdir
            wpr_wd0 = wprof.dframe.loc[time].wdir[0]  # first gate
            pbby = bby.dframe.loc[time].precip
            pczd = czd.dframe.loc[time].precip



            ''' exclude data when there is nan in 
                surf obs or windprof first gate '''
            if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0):
                # tta_bool = np.append(tta_bool, [False])
                count_exclude += 1
                time += onehr
                continue



            ''' these are obs included in the analysis, then we
                determine if they are tta or no-tta '''
            rainfall_bby=np.append(rainfall_bby,pbby)
            rainfall_czd=np.append(rainfall_czd,pczd)


            ''' check conditions '''
            cond1 = (surf_wd <= wdir_surf)
            cond2 = (wpr_wd0 <= wdir_wprof)
            if rain_bby and rain_czd:
                cond3 = (pbby >= rain_bby)
                cond4 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and \
                                cond3 and cond4
            elif rain_czd:
                cond3 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3
            elif rain_bby:
                cond3 = (pbby >= rain_bby)
                tta_condition = cond1 and cond2 and cond3
            else:
                tta_condition = cond1 and cond2

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                # reset buffer
                bool_buffer = np.array([False] * nhours)
                count = 0

            count_while += 1
            time += onehr



        tta_bool = np.array(tta_bool).astype(bool)
        tta_hours = tta_bool.sum()
        notta_hours = count_while-tta_hours
        self.tta_hours = tta_hours
        self.notta_hours = notta_hours
        self.time_beg = time_beg
        self.time_end = time_end
        self.count_while = count_while
        self.count_exclude = count_exclude
        self.total_rainfall_bby = np.nansum(rainfall_bby)
        self.total_rainfall_czd = np.nansum(rainfall_czd)
        self.bool = tta_bool
        self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool])
        self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool])
        self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool])
        self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool])
        self.rainfall_bby_before_analysis = rbby_before
        self.rainfall_bby_after_analysis = rbby_after
        self.rainfall_czd_before_analysis = rczd_before
        self.rainfall_czd_after_analysis = rczd_after
        self.nwprof_before = nwprof_before
        self.nwprof_after = nwprof_after
        self.wprof_hgt = wprof.hgt



        print('TTA analysis finished')
Exemplo n.º 8
0
    def start(self, wdir_surf=None, wdir_wprof=None, 
              rain_bby=None,rain_czd=None,nhours=None):

        ''' this is an old verion
            prefer start_df that uses pandas dataframe
            for analysis
        '''

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' the latest of the beg '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)

        ''' the earliest of the end '''
        time_end = min(end_bby, end_czd, end_wpr)

        ''' rainfall before all obs start '''
        rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip)
        rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip)

        ''' rainfall after all obs end '''
        rbby_after = np.nansum(bby.dframe.loc[time_end:].precip)
        rczd_after = np.nansum(czd.dframe.loc[time_end:].precip)

        ''' number of windprofiles before (after)
            all obs start (end) '''
        nwprof_before = len(wprof.dframe.loc[:time_beg].wdir)
        nwprof_after = len(wprof.dframe.loc[time_end:].wdir)

        onehr = timedelta(hours=1)
        time = time_beg
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        rainfall_czd = np.array([])
        rainfall_bby = np.array([])
#        wpr_wd_inc = []
#        wpr_ws_inc = []
        count = 0
        count_while = 0
        count_exclude = 0

        while (time <= time_end):
                
            surf_wd = bby.dframe.loc[time].wdir
            wpr_wd0 = wprof.dframe.loc[time].wdir[0]  # first gate
            pbby = bby.dframe.loc[time].precip
            pczd = czd.dframe.loc[time].precip

            ''' exclude data when there is nan in 
                surf obs or windprof first gate '''
            if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0):
                # tta_bool = np.append(tta_bool, [False])
                count_exclude += 1
                time += onehr
                continue

            ''' these are obs included in the analysis, then we
                determine if they are tta or no-tta '''
            rainfall_bby=np.append(rainfall_bby,pbby)
            rainfall_czd=np.append(rainfall_czd,pczd)


            ''' check conditions '''
            cond1 = (surf_wd <= wdir_surf)
            cond2 = (wpr_wd0 <= wdir_wprof)
            if rain_bby and rain_czd:
                cond3 = (pbby >= rain_bby)
                cond4 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and \
                                cond3 and cond4
            elif rain_czd:
                cond3 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3
            elif rain_bby:
                cond3 = (pbby >= rain_bby)
                tta_condition = cond1 and cond2 and cond3
            else:
                tta_condition = cond1 and cond2

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                # reset buffer
                bool_buffer = np.array([False] * nhours)
                count = 0

            count_while += 1
            time += onehr

        tta_bool = np.array(tta_bool).astype(bool)
        tta_hours = tta_bool.sum()
        notta_hours = count_while-tta_hours
        self.tta_hours = tta_hours
        self.notta_hours = notta_hours
        self.time_beg = time_beg
        self.time_end = time_end
        self.count_while = count_while
        self.count_exclude = count_exclude
        self.total_rainfall_bby = np.nansum(rainfall_bby)
        self.total_rainfall_czd = np.nansum(rainfall_czd)
        self.bool = tta_bool
        self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool])
        self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool])
        self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool])
        self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool])
        self.rainfall_bby_before_analysis = rbby_before
        self.rainfall_bby_after_analysis = rbby_after
        self.rainfall_czd_before_analysis = rczd_before
        self.rainfall_czd_after_analysis = rczd_after
        self.nwprof_before = nwprof_before
        self.nwprof_after = nwprof_after
        self.wprof_hgt = wprof.hgt



        print('TTA analysis finished')
Exemplo n.º 9
0
    def start_df_layer(self,
                       wdir_thres  = None,
                       wdir_layer  = [None,None],  # [meters]
                       rain_bby    = None,
                       rain_czd    = None,
                       nhours      = None):

        '''
            this version uses pandas dataframe similar
            to start_df but uses a layer instead of a 
            level            
        '''

        import pandas as pd

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)
        
        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' trim the head and tail of dataset depending
            on the latest time of the beginning and 
            earliest of the ending '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)
        time_end = min(end_bby, end_czd, end_wpr)

        ''' initializations '''
        onehr = timedelta(hours=1)
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        count = 0
        rng = pd.date_range(start = time_beg,
                            end   = time_end,
                            freq  = '1H')


        idx = np.where((wprof.hgt>=wdir_layer[0]) & 
                       (wprof.hgt<wdir_layer[1]))[0]

        wphgt = wprof.hgt[idx]

        ''' columns included in the dataframe '''        
        cols = []        
        wdircol = 'wd_{}-{:2.0f}m'.format(wdir_layer[0],wphgt[-1])
        cols.append(wdircol)
        cols.append('rbby')
        cols.append('rczd')
        cols.append('tta')
        cols.append('consecutive')
        
        ''' create dataframe '''
        df = pd.DataFrame(index=rng,columns=cols)       
        
        
        ''' loop evaluates each time '''
        time = time_beg
        while (time <= time_end):

            if wdir_layer[0] == 0:
                surf_wd = np.array(bby.dframe.loc[time].wdir)
                surf_ws = np.array(bby.dframe.loc[time].wspd)
            else:
                surf_wd = np.array([])
                surf_ws = np.array([])
                
            wpro_wd = np.array(wprof.dframe.loc[time].wdir)[idx]
            wpro_ws = np.array(wprof.dframe.loc[time].wspd)[idx]

            wd = np.append(surf_wd,wpro_wd)
            ws = np.append(surf_ws,wpro_ws)
            
            u = -ws*np.sin(np.radians(wd))
            v = -ws*np.cos(np.radians(wd))
            u_mean = u.mean()
            v_mean = v.mean()
#            ws_mean = np.sqrt(u_mean**2+v_mean**2)
            wd_mean = 270 - np.arctan2(v_mean,u_mean)*180./np.pi
            if wd_mean > 360:
                wd_mean -= 360
            
            
            df.loc[time][wdircol] = wd_mean
            
            pbby = bby.dframe.loc[time].precip
            df.loc[time].rbby = pbby            
            
            pczd = czd.dframe.loc[time].precip
            df.loc[time].rczd = pczd
                
           
#            df.loc[time].wssrf = bby.dframe.loc[time].wspd
#            df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0]

            ''' check conditions '''               
            if wdir_thres:
                if isinstance(wdir_thres,int):
                    cond1 = (wd_mean < wdir_thres)
                elif isinstance(wdir_thres,str):
                    cond1 = parse_operator(wd_mean,wdir_thres)

            if rain_czd:
                cond3 = (pczd >= rain_czd)

            if rain_bby:            
                cond4 = (pbby >= rain_bby)
              
            ''' create joint condition '''
            if wdir_thres and rain_bby and rain_czd:
                tta_condition = cond1 and cond3 and cond4
            elif wdir_thres and rain_czd:
                tta_condition = cond1 and cond3
            elif wdir_thres and rain_bby:
                tta_condition = cond1 and cond4
            else:
                tta_condition = cond1 


            df.loc[time].tta = tta_condition

            ''' construct boolean array indicating
                hourly TTA conditions with minimum
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                ' reset buffer '
                bool_buffer = np.array([False] * nhours)
                count = 0

#
            time += onehr

        df.consecutive = tta_bool.astype(bool)

        ar_wdir = df[wdircol].values.astype(float)
        ar_rbby = df.rbby.values.astype(float)
        ar_rczd = df.rczd.values.astype(float)
        
        wdirIsNan = np.isnan(ar_wdir)
        rbbyIsNan = np.isnan(ar_rbby)
        rczdIsNan = np.isnan(ar_rczd)
        
        
        if rain_czd is None:
            exclude = wdirIsNan | rbbyIsNan | rczdIsNan        
        elif rain_czd >= 0.25:
            ''' this boolean excludes dates when there is no
                precip at CZD '''       
            zeros = np.zeros((1,len(ar_rbby)))
            rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T)                  
            exclude = wdirIsNan | rbbyIsNan | rczdIsNan | rczdIsZero


        tot_rbby = np.round(df.rbby.sum(),3)
        tot_rczd = np.round(df.rczd.sum(),3)

        exc_rbby = np.round(df[exclude].rbby.sum(),3)
        exc_rczd = np.round(df[exclude].rczd.sum(),3)

        inc_rbby = tot_rbby - exc_rbby
        inc_rczd = tot_rczd - exc_rczd

        tot_hrs   = np.round(df.index.size,0).astype(int)
        exc_hours = np.round(exclude.sum(),0).astype(int)
        inc_hours = tot_hrs - exc_hours

        tta_rbby   = np.round(df[df.consecutive].rbby.sum(),3)
        tta_rczd   = np.round(df[df.consecutive].rczd.sum(),3)
        notta_rbby = inc_rbby - tta_rbby
        notta_rczd = inc_rczd - tta_rczd

        exclude_dates = df[exclude].index
        include_dates = df[~exclude].index
        tta_dates     = df[~exclude & df.consecutive].index
        notta_dates   = df[~exclude & ~df.consecutive].index

        tta_hours   = tta_dates.size
        notta_hours = notta_dates.size

        self.time_beg           = time_beg
        self.time_end           = time_end
        self.count_hrs_include  = inc_hours
        self.count_hrs_exclude  = exc_hours
        self.tot_rainfall_bby   = tot_rbby
        self.tot_rainfall_czd   = tot_rczd
        self.inc_rainfall_bby   = inc_rbby
        self.inc_rainfall_czd   = inc_rczd
        self.exc_rainfall_bby   = exc_rbby
        self.exc_rainfall_czd   = exc_rczd        
        self.tta_rainfall_bby   = tta_rbby
        self.tta_rainfall_czd   = tta_rczd
        self.notta_rainfall_bby = notta_rbby
        self.notta_rainfall_czd = notta_rczd
        self.tta_hours          = tta_hours
        self.notta_hours        = notta_hours
        self.wprof_hgt          = wprof.hgt
        self.exclude_dates      = exclude_dates
        self.include_dates      = include_dates
        self.tta_dates          = tta_dates
        self.notta_dates        = notta_dates
        self.df                 = df
Exemplo n.º 10
0
    def start_df(self, wdir_surf   = None,
                       wdir_wprof  = None,
                       wprof_gate  = 0,
                       rain_bby    = None,
                       rain_czd    = None,
                       nhours      = None):

        '''
            this version uses pandas dataframe, 
            it should be more accurate and simpler
            than start method
        '''

        import pandas as pd

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' trim the head and tail of dataset depending
            on the latest time of the beginning and 
            earliest of the ending '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)
        time_end = min(end_bby, end_czd, end_wpr)

        ''' initializations '''
        onehr = timedelta(hours=1)
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        count = 0
        rng = pd.date_range(start=time_beg,
                            end=time_end,
                            freq='1H')

        ''' columns included in the dataframe '''        
        cols = []        
        cols.append('wdsrf')
        wprofcol = 'wdwpr{}'.format(wprof_gate)
        cols.append(wprofcol)
        cols.append('rbby')
        cols.append('rczd')
        cols.append('tta')
        cols.append('consecutive')
        
        ''' create dataframe '''
        df = pd.DataFrame(index=rng,columns=cols)       
        
        
        ''' loop evaluates each time '''
        time = time_beg
        while (time <= time_end):

            surf_wd = bby.dframe.loc[time].wdir
            df.loc[time].wdsrf = surf_wd            
            
            wpr_wd0 = wprof.dframe.loc[time].wdir[wprof_gate] 
            df.loc[time][wprofcol] = wpr_wd0            
            
            pbby = bby.dframe.loc[time].precip
            df.loc[time].rbby = pbby            
            
            pczd = czd.dframe.loc[time].precip
            df.loc[time].rczd = pczd
                
#            if surf_wd is None:
#                surf_wd = np.nan
           
#            df.loc[time].wssrf = bby.dframe.loc[time].wspd
#            df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0]

            ''' check conditions '''               
            if wdir_surf:
                if isinstance(wdir_surf,int):
                    cond1 = (surf_wd <= wdir_surf)
                elif isinstance(wdir_surf,str):
                    cond1 = parse_operator(surf_wd,wdir_surf)

            if wdir_wprof:
                if isinstance(wdir_wprof,int):
                    cond2 = (wpr_wd0 <= wdir_wprof) 
                elif isinstance(wdir_wprof,str):
                    cond2 = parse_operator(wpr_wd0,wdir_wprof)

            if rain_czd:
                cond3 = (pczd >= rain_czd)

            if rain_bby:            
                cond4 = (pbby >= rain_bby)
              
            ''' create joint condition '''
            if wdir_surf and wdir_wprof and rain_bby and rain_czd:
                tta_condition = cond1 and cond2 and cond3 and cond4
            elif wdir_surf and wdir_wprof and rain_czd:
                tta_condition = cond1 and cond2 and cond3
            elif wdir_surf and wdir_wprof and rain_bby:
                tta_condition = cond1 and cond2 and cond4
            elif wdir_surf and rain_czd:
                tta_condition = cond1 and cond3
            elif wdir_wprof and rain_czd:
                tta_condition = cond2 and cond3                
            elif wdir_surf and rain_bby:
                tta_condition = cond1 and cond4
            elif wdir_wprof and rain_bby:
                tta_condition = cond2 and cond4                
            elif wdir_surf and wdir_wprof:
                tta_condition = cond1 and cond2
            else:
                tta_condition = cond1 


            df.loc[time].tta = tta_condition

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                ' reset buffer '
                bool_buffer = np.array([False] * nhours)
                count = 0

            time += onehr

        df.consecutive = tta_bool.astype(bool)

        ar_wdsrf = df.wdsrf.values.astype(float)
        ar_wdwpr = df[wprofcol].values.astype(float)
        ar_rbby = df.rbby.values.astype(float)
        ar_rczd = df.rczd.values.astype(float)
        
        wdsrfIsNan = np.isnan(ar_wdsrf)
        wdwprIsNan = np.isnan(ar_wdwpr)
        rbbyIsNan = np.isnan(ar_rbby)
        rczdIsNan = np.isnan(ar_rczd)
        
        
        if rain_czd is None:
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan        
        elif rain_czd >= 0.25:
            ''' this boolean excludes dates when there is no
                precip at CZD '''       
            zeros = np.zeros((1,len(ar_rbby)))
            rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T)                  
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \
                    | rczdIsZero


        tot_rbby = np.round(df.rbby.sum(),3)
        tot_rczd = np.round(df.rczd.sum(),3)

        exc_rbby = np.round(df[exclude].rbby.sum(),3)
        exc_rczd = np.round(df[exclude].rczd.sum(),3)

        inc_rbby = tot_rbby - exc_rbby
        inc_rczd = tot_rczd - exc_rczd

        tot_hrs   = np.round(df.index.size,0).astype(int)
        exc_hours = np.round(exclude.sum(),0).astype(int)
        inc_hours = tot_hrs - exc_hours

        tta_rbby   = np.round(df[df.consecutive].rbby.sum(),3)
        tta_rczd   = np.round(df[df.consecutive].rczd.sum(),3)
        notta_rbby = inc_rbby - tta_rbby
        notta_rczd = inc_rczd - tta_rczd

        exclude_dates = df[exclude].index
        include_dates = df[~exclude].index
        tta_dates     = df[~exclude & df.consecutive].index
        notta_dates   = df[~exclude & ~df.consecutive].index

        tta_hours   = tta_dates.size
        notta_hours = notta_dates.size

        self.time_beg           = time_beg
        self.time_end           = time_end
        self.count_hrs_include  = inc_hours
        self.count_hrs_exclude  = exc_hours
        self.tot_rainfall_bby   = tot_rbby
        self.tot_rainfall_czd   = tot_rczd
        self.inc_rainfall_bby   = inc_rbby
        self.inc_rainfall_czd   = inc_rczd
        self.exc_rainfall_bby   = exc_rbby
        self.exc_rainfall_czd   = exc_rczd        
        self.tta_rainfall_bby   = tta_rbby
        self.tta_rainfall_czd   = tta_rczd
        self.notta_rainfall_bby = notta_rbby
        self.notta_rainfall_czd = notta_rczd
        self.tta_hours          = tta_hours
        self.notta_hours        = notta_hours
        self.wprof_hgt          = wprof.hgt
        self.exclude_dates      = exclude_dates
        self.include_dates      = include_dates
        self.tta_dates          = tta_dates
        self.notta_dates        = notta_dates
        self.df                 = df
Exemplo n.º 11
0
def preprocess(years=None, layer=None, verbose=True):

    import pandas as pd
    import parse_data

    WD = pd.Series()
    WS = pd.Series()
    WD_rain = pd.Series()
    WS_rain = pd.Series()
    precip_good = pd.DataFrame()

    for year in years:

        wpr = parse_data.windprof(year=year)
        bby = parse_data.surface('bby', year=year)
        czd = parse_data.surface('czd', year=year)
        hgt = wpr.hgt

        ' find common time period '
        first_bby = bby.dframe.index[0]
        first_czd = czd.dframe.index[0]
        first_wpr = wpr.dframe.index[0]
        last_bby = bby.dframe.index[-1]
        last_czd = czd.dframe.index[-1]
        last_wpr = wpr.dframe.index[-1]
        first = max(first_bby, first_czd, first_wpr)
        last = min(last_bby, last_czd, last_wpr)

        ' reduce time interval so all start and end at same time '
        wpr = wpr.dframe.loc[first:last]
        bby = bby.dframe.loc[first:last]
        czd = czd.dframe.loc[first:last]

        ' append surface values to windprof to make entire profile '
        surf_wsp = iter(bby.wspd.values.tolist())
        surf_wdr = iter(bby.wdir.values.tolist())
        wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x)
        wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x)
        hgt = np.append([0], hgt)

        ' check nans on precip '
        precip = pd.concat([bby.precip, czd.precip], axis=1)
        precip.columns = ['bby', 'czd']
        precip_nans = precip.apply(lambda x: x.isnull().any(),
                                   axis=1,
                                   reduce=True)
        precip_nans.name = 'precip_nan'
        tx = 'year:{}, any_precip_nan:{:4d}'
        if verbose:
            print(tx.format(year, precip_nans.sum()))

        ' check entire profile nans ( same for ws and wd)'
        prof_nans = wsp.apply(lambda x: np.isnan(x).all())
        prof_nans.name = 'prof_nan'

        ' include only hours when surf and the entire' \
        ' profile is non-missing (profile is allowed to have' \
        ' at least one non-missing)'
        nan_df = pd.concat([precip_nans, prof_nans], axis=1)
        any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True)
        include = ~any_nan
        precip_good = precip_good.append(precip[include])

        ' rainy days at CZD '
        rain_czd = czd.precip > 0

        ' reduce and save to big Series '
        wdr = wdr[include]
        wsp = wsp[include]
        wdr_rain = wdr[rain_czd]
        wsp_rain = wsp[rain_czd]
        WD = WD.append(wdr)
        WS = WS.append(wsp)
        WD_rain = WD_rain.append(wdr_rain)
        WS_rain = WS_rain.append(wsp_rain)

    " compute components "
    WD_sin = WD.apply(lambda x: sin(x))
    WD_cos = WD.apply(lambda x: cos(x))
    U_df = -1 * WS.multiply(WD_sin)
    V_df = -1 * WS.multiply(WD_cos)
    wind_flow_180 = -(U_df * sin(180) + V_df * cos(180))
    wind_flow_90 = U_df * sin(90) + V_df * cos(90)

    " layer-mean"
    layer_idx = np.where((hgt >= layer[0]) & (hgt < layer[1]))[0]
    mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx]))
    mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx]))
    wd_layer = 270 - (np.arctan2(mean_V, mean_U) * 180 / np.pi)
    wd_layer[wd_layer > 360] -= 360
    wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]],
                                              hgt[layer_idx[-1]])

    return dict(WD=WD,
                WS=WS,
                WD_rain=WD_rain,
                WS_rain=WS_rain,
                wd_layer=wd_layer,
                precip=precip,
                precip_good=precip_good)
Exemplo n.º 12
0
import parse_data
from ctext import ctext

txtHeader1 = '\nSurface\n{:^35} || {:^35}'
print txtHeader1.format('Beg', 'End')

txtHeader2 = '{:^16} | {:^16} || {:^16} {:^16}'
print txtHeader2.format('BBY', 'CZD', 'CZD', 'BBY')

t = ctext('{}')

for y in [1998] + range(2001, 2013):
    bby = parse_data.surface('bby', y)
    czd = parse_data.surface('czd', y)

    beg_bby, end_bby = bby.check_beg_end()
    beg_czd, end_czd = czd.check_beg_end()

    if beg_czd < beg_bby:
        tb = t.text + ' | ' + t.red()
    else:
        tb = t.text + ' | ' + t.text

    if end_czd > end_bby:
        te = t.red() + ' | ' + t.text
    else:
        te = t.text + ' | ' + t.text

    txtDate = tb + ' || ' + te

    fmt = '%Y-%m-%d %H:%M'