def plot_with_lines(year=None, target=None):

    fig = plt.figure(figsize=(10, 5))

    gs = gsp.GridSpec(1, 2, width_ratios=[2, 1])

    ax1 = plt.subplot(gs[0])
    ax2 = plt.subplot(gs[1])

    wprof = parse_data.windprof(year)
    wp = np.squeeze(pandas2stack(wprof.dframe[target]))
    wp_ma = ma.masked_where(np.isnan(wp), wp)
    X, Y = wprof.time, wprof.hgt
    ax1.pcolormesh(X, Y, wp_ma, vmin=0, vmax=360)
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
    ax1.set_xlabel(r'$ Time \rightarrow$')
    ax1.set_ylabel('height gate')

    for prof in range(wp.shape[1]):
        x = wp[:, prof]
        y = range(wp.shape[0])
        ax2.plot(x, y, color='r', alpha=0.05)
        # ax2.scatter(x,y,color='r',alpha=0.05)
    ax2.set_yticklabels('')
    ax2.set_xlabel(target)

    ax1.set_title('BBY Windprof wdir')
    plt.tight_layout()
    plt.show(block=False)
def plot_with_lines(year=None,target=None):

    fig = plt.figure(figsize=(10,5))

    gs = gsp.GridSpec(1, 2,
                      width_ratios=[2,1]
                      )

    ax1 = plt.subplot(gs[0])
    ax2 = plt.subplot(gs[1])

    wprof = parse_data.windprof(year)
    wp = np.squeeze(pandas2stack(wprof.dframe[target]))
    wp_ma = ma.masked_where(np.isnan(wp),wp)
    X,Y=wprof.time,wprof.hgt
    ax1.pcolormesh(X,Y,wp_ma,vmin=0,vmax=360)
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
    ax1.set_xlabel(r'$ Time \rightarrow$')
    ax1.set_ylabel('height gate')

    for prof in range(wp.shape[1]):
        x = wp[:,prof]
        y = range(wp.shape[0])
        ax2.plot(x,y,color='r',alpha=0.05)
        # ax2.scatter(x,y,color='r',alpha=0.05)
    ax2.set_yticklabels('')
    ax2.set_xlabel(target)

    ax1.set_title('BBY Windprof wdir')
    plt.tight_layout()
    plt.show(block=False)
import parse_data

for y in [1998] + range(2001, 2013):
    wprof = parse_data.windprof(y)
    wprof.check_hgt(y)
def process(year=[],wdsurf=None,
               wdwpro=None,rainbb=None,
               raincz=None, nhours=None):
        
        
        binss={'wdir':np.arange(0,370,10),
               'wspd':np.arange(0,36,1)}
        target = ['wdir','wspd']
        arrays = {}
        for t in target:
        
            first = True        
            for y in year:
                print('Processing year {}'.format(y))
                
                ' tta analysis '
                tta = tta_analysis(y)
                tta.start_df(wdir_surf=wdsurf,
                               wdir_wprof=wdwpro,
                               rain_bby=rainbb,
                               rain_czd=raincz,
                               nhours=nhours)
        
                ' retrieve dates '
                include_dates = tta.include_dates
                tta_dates = tta.tta_dates
                notta_dates = tta.notta_dates
        
                ' read wprof '
                wprof_df = parse_data.windprof(y)
            
                wprof = wprof_df.dframe[t]        
        
                ' wprof partition '
                wprof = wprof.loc[include_dates]    # all included
                wprof_tta = wprof.loc[tta_dates]    # only tta
                wprof_notta = wprof.loc[notta_dates]# only notta
                
                s1 = np.squeeze(pandas2stack(wprof))
                s2 = np.squeeze(pandas2stack(wprof_tta))
                s3 = np.squeeze(pandas2stack(wprof_notta))
        
                if first:
                    wp = s1
                    wp_tta = s2
                    wp_notta = s3
                    first = False
                else:
                    wp = np.hstack((wp,s1))
                    wp_tta = np.hstack((wp_tta,s2))
                    wp_notta = np.hstack((wp_notta, s3))
    
            _,wp_hours = wp.shape
            _,tta_hours = wp_tta.shape
            _,notta_hours = wp_notta.shape    
            
            arrays[t]=[wp,wp_tta,wp_notta]

    
        ' makes CFAD '
        hist_array_spd = np.empty((40,len(binss['wspd'])-1,3))
        hist_array_dir = np.empty((40,len(binss['wdir'])-1,3))
        cfad_array_spd = np.empty((40,len(binss['wspd'])-1,3))
        cfad_array_dir = np.empty((40,len(binss['wdir'])-1,3))
        
        average_spd = np.empty((40,3))
        average_dir = np.empty((40,3))
        median_spd = np.empty((40,3))
        median_dir = np.empty((40,3))
        
        for k,v in arrays.iteritems():        
        
            hist_array = np.empty((40,len(binss[k])-1,3))
            cfad_array = np.empty((40,len(binss[k])-1,3))
            average = np.empty((40,3))
            median = np.empty((40,3))
            wp = v[0]
            wp_tta = v[1]
            wp_notta = v[2]
        
            for hgt in range(wp.shape[0]):
                
                row1 = wp[hgt,:]
                row2 = wp_tta[hgt,:]
                row3 = wp_notta[hgt,:]
        
                for n,r in enumerate([row1,row2,row3]):
        
                    ' following CFAD Yuter et al (1995) '
                    freq,bins=np.histogram(r[~np.isnan(r)],
                                            bins=binss[k])
                    hist_array[hgt,:,n] = freq
                    cfad_array[hgt,:,n] = 100.*(freq/float(freq.sum()))
        
                    bin_middle = (bins[1:]+bins[:-1])/2.
                    average[hgt,n] = np.sum(freq*bin_middle)/freq.sum()
                    median[hgt,n] = np.percentile(r[~np.isnan(r)],50)
            
            if k == 'wspd':
                hist_array_spd = hist_array
                cfad_array_spd = cfad_array
                average_spd = average
                median_spd = median
            else:                
                hist_array_dir = hist_array
                cfad_array_dir = cfad_array
                average_dir = average
                median_dir = median
    
        return [hist_array_spd,
                hist_array_dir,
                cfad_array_spd,
                cfad_array_dir,
                binss['wspd'],
                binss['wdir'],
                wprof_df.hgt,
                wp_hours,
                tta_hours,
                notta_hours,
                average_spd,
                average_dir,
                median_spd,
                median_dir]
        
def processv2(year=[],wdsurf=None,
               wdwpro=None,rainbb=None,
               raincz=None, nhours=None):
        
        ''' v2: target loop moved into year loop '''
        
        
        binss={'wdir': np.arange(0,370,10),
               'wspd': np.arange(0,36,1),
               'u': np.arange(-15,21,1),
               'v': np.arange(-14,21,1),
               }
               
        target = ['wdir','wspd']
        arrays = {}
        wsp = np.empty((40,1))
        wsp_tta = np.empty((40,1))
        wsp_notta = np.empty((40,1))
        wdr = np.empty((40,1))
        wdr_tta = np.empty((40,1))
        wdr_notta = np.empty((40,1))
        
        for y in year:
            print('Processing year {}'.format(y))
            
            ' tta analysis '
            tta = tta_analysis(y)
            tta.start_df(wdir_surf  = wdsurf,
                         wdir_wprof = wdwpro,
                         rain_bby   = rainbb,
                         rain_czd   = raincz,
                         nhours     = nhours)
    
            ' retrieve dates '
            include_dates = tta.include_dates
            tta_dates     = tta.tta_dates
            notta_dates   = tta.notta_dates
    
            ' read wprof '
            wprof_df = parse_data.windprof(y)
            
            for n,t in enumerate(target):
                
                wprof = wprof_df.dframe[t]        
        
                ' wprof partition '
                wprof = wprof.loc[include_dates]    # all included
                wprof_tta = wprof.loc[tta_dates]    # only tta
                wprof_notta = wprof.loc[notta_dates]# only notta
                
                s1 = np.squeeze(pandas2stack(wprof))
                if wprof_tta.size > 0:
                    s2 = np.squeeze(pandas2stack(wprof_tta))
                    ttaok = True
                else:
                    ttaok =False
                s3 = np.squeeze(pandas2stack(wprof_notta))
        
                if t == 'wdir':
                    wdr = np.hstack((wdr,s1))
                    if ttaok is True:
                        if s2.ndim == 1:
                            s2=np.expand_dims(s2,axis=1)
                        wdr_tta = np.hstack((wdr_tta,s2))
                    wdr_notta = np.hstack((wdr_notta, s3))                    
                else:
                    wsp = np.hstack((wsp,s1))
                    if ttaok is True:
                        if s2.ndim == 1:
                            s2=np.expand_dims(s2,axis=1)                        
                        wsp_tta = np.hstack((wsp_tta,s2))
                    wsp_notta = np.hstack((wsp_notta, s3))

        arrays['wdir']=[wdr,wdr_tta,wdr_notta]
        arrays['wspd']=[wsp,wsp_tta,wsp_notta]
                
        uw = -wsp*np.sin(np.radians(wdr))
        uw_tta = -wsp_tta*np.sin(np.radians(wdr_tta))
        uw_notta = -wsp_notta*np.sin(np.radians(wdr_notta))

        vw = -wsp*np.cos(np.radians(wdr))
        vw_tta = -wsp_tta*np.cos(np.radians(wdr_tta))
        vw_notta = -wsp_notta*np.cos(np.radians(wdr_notta))        

        arrays['u']=[uw,uw_tta,uw_notta]
        arrays['v']=[vw,vw_tta,vw_notta]

        ''' total hours, first rows are empty '''                
        _,wp_hours = wsp.shape
        _,tta_hours = wsp_tta.shape
        _,notta_hours = wsp_notta.shape    
        wp_hours -= 1
        tta_hours-= 1
        notta_hours -= 1
        
        ' initialize arrays '
        hist_array_spd = np.empty((40,len(binss['wspd'])-1,3))
        hist_array_dir = np.empty((40,len(binss['wdir'])-1,3))
        cfad_array_spd = np.empty((40,len(binss['wspd'])-1,3))
        cfad_array_dir = np.empty((40,len(binss['wdir'])-1,3))        
        average_spd = np.empty((40,3))
        average_dir = np.empty((40,3))
        median_spd = np.empty((40,3))
        median_dir = np.empty((40,3))
        
        ' loop for variable (wdir,wspd) '
        for k,v in arrays.iteritems():        
        
            hist_array = np.empty((40,len(binss[k])-1,3))
            cfad_array = np.empty((40,len(binss[k])-1,3))
            average = np.empty((40,3))
            median = np.empty((40,3))
            
            ' extract value'
            wp = v[0]
            wp_tta = v[1]
            wp_notta = v[2]
        
            ' makes CFAD '
            for hgt in range(wp.shape[0]):
                
                row1 = wp[hgt,:]
                row2 = wp_tta[hgt,:]
                row3 = wp_notta[hgt,:]
        
                for n,r in enumerate([row1,row2,row3]):
        
                    ' following CFAD Yuter et al (1995) '
                    freq,bins=np.histogram(r[~np.isnan(r)],
                                            bins=binss[k])
                    hist_array[hgt,:,n] = freq
                    cfad_array[hgt,:,n] = 100.*(freq/float(freq.sum()))
        
                    bin_middle = (bins[1:]+bins[:-1])/2.
                    average[hgt,n] = np.sum(freq*bin_middle)/freq.sum()
                    median[hgt,n] = np.percentile(r[~np.isnan(r)],50)
            
            if k == 'wspd':
                hist_array_spd = hist_array
                cfad_array_spd = cfad_array
                average_spd = average
                median_spd = median
            elif k == 'wdir':                
                hist_array_dir = hist_array
                cfad_array_dir = cfad_array
                average_dir = average
                median_dir = median
            elif k == 'u':
                hist_array_u = hist_array
                cfad_array_u = cfad_array
                average_u = average
                median_u = median                
            elif k == 'v':
                hist_array_v = hist_array
                cfad_array_v = cfad_array
                average_v = average
                median_v = median
    
        return [hist_array_spd,
                hist_array_dir,
                hist_array_u,
                hist_array_v,
                cfad_array_spd,
                cfad_array_dir,
                cfad_array_u,
                cfad_array_v,
                binss['wspd'],
                binss['wdir'],
                binss['u'],
                binss['v'],
                wprof_df.hgt,
                wp_hours,
                tta_hours,
                notta_hours,
                average_spd,
                average_dir,
                average_u,
                average_v,
                median_spd,
                median_dir,
                median_u,
                median_v,
                ]
def preprocess(years=None, layer=None, verbose=True):

    import pandas as pd
    import parse_data

    WD = pd.Series()
    WS = pd.Series()
    WD_rain = pd.Series()
    WS_rain = pd.Series()
    precip_good = pd.DataFrame()

    for year in years:

        wpr = parse_data.windprof(year=year)
        bby = parse_data.surface('bby', year=year)
        czd = parse_data.surface('czd', year=year)
        hgt = wpr.hgt

        ' find common time period '
        first_bby = bby.dframe.index[0]
        first_czd = czd.dframe.index[0]
        first_wpr = wpr.dframe.index[0]
        last_bby = bby.dframe.index[-1]
        last_czd = czd.dframe.index[-1]
        last_wpr = wpr.dframe.index[-1]
        first = max(first_bby, first_czd, first_wpr)
        last = min(last_bby, last_czd, last_wpr)

        ' reduce time interval so all start and end at same time '
        wpr = wpr.dframe.loc[first:last]
        bby = bby.dframe.loc[first:last]
        czd = czd.dframe.loc[first:last]

        ' append surface values to windprof to make entire profile '
        surf_wsp = iter(bby.wspd.values.tolist())
        surf_wdr = iter(bby.wdir.values.tolist())
        wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x)
        wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x)
        hgt = np.append([0],hgt)

        ' check nans on precip '
        precip = pd.concat([bby.precip, czd.precip], axis=1)
        precip.columns = ['bby', 'czd']
        precip_nans = precip.apply(lambda x: x.isnull().any(),
                                   axis=1, reduce=True)
        precip_nans.name = 'precip_nan'
        tx = 'year:{}, any_precip_nan:{:4d}'
        if verbose:
            print(tx.format(year, precip_nans.sum()))

        ' check entire profile nans ( same for ws and wd)'
        prof_nans = wsp.apply(lambda x: np.isnan(x).all())
        prof_nans.name = 'prof_nan'

        ' include only hours when surf and the entire' \
        ' profile is non-missing (profile is allowed to have' \
        ' at least one non-missing)'
        nan_df = pd.concat([precip_nans, prof_nans], axis=1)
        any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True)
        include = ~any_nan
        precip_good = precip_good.append(precip[include])

        ' rainy days at CZD '
        rain_czd = czd.precip > 0

        ' reduce and save to big Series '
        wdr = wdr[include]
        wsp = wsp[include]
        wdr_rain = wdr[rain_czd]
        wsp_rain = wsp[rain_czd]
        WD = WD.append(wdr)
        WS = WS.append(wsp)
        WD_rain = WD_rain.append(wdr_rain)
        WS_rain = WS_rain.append(wsp_rain)

    " compute components "
    WD_sin = WD.apply(lambda x: sin(x))
    WD_cos = WD.apply(lambda x: cos(x))
    U_df = -1 * WS.multiply(WD_sin)
    V_df = -1 * WS.multiply(WD_cos)
    wind_flow_180 = -(U_df * sin(180) + V_df * cos(180))
    wind_flow_90 = U_df * sin(90) + V_df * cos(90)

    " layer-mean"
    layer_idx = np.where((hgt >= layer[0]) &
                         (hgt < layer[1]))[0]
    mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx]))
    mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx]))
    wd_layer = 270-(np.arctan2(mean_V, mean_U)*180/np.pi)
    wd_layer[wd_layer > 360] -= 360
    wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]],
                                        hgt[layer_idx[-1]])

    return dict(WD=WD,
                WS=WS,
                WD_rain=WD_rain,
                WS_rain=WS_rain,
                wd_layer=wd_layer,
                precip=precip,
                precip_good=precip_good)
def plot_with_hist(year=None,target=None,normalized=True,
                    pngsuffix=None):

    name={'wdir':'Wind Direction',
          'wspd':'Wind Speed'}

    if target == 'wdir':
        vmin,vmax = [0,360]
        bins = np.arange(0,370,10)
        hist_xticks = np.arange(0,400,40)
        hist_xlim = [0,360]
    elif target == 'wspd':
        vmin,vmax = [0,30]
        bins = np.arange(0,36,1)
        hist_xticks = np.arange(0,40,5)
        hist_xlim = [0,35]

    fig = plt.figure(figsize=(20,5))

    gs = gsp.GridSpec(1, 2,
                      width_ratios=[3,1]
                      )

    ax1 = plt.subplot(gs[0])
    ax2 = plt.subplot(gs[1])

    wprof = parse_data.windprof(year)
    wp = np.squeeze(pandas2stack(wprof.dframe[target]))
    wp_ma = ma.masked_where(np.isnan(wp),wp)
    X,Y = wprof.time,wprof.hgt
    p = ax1.pcolormesh(X,Y,wp_ma,vmin=vmin,vmax=vmax)
    add_colorbar(ax1,p)
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
    ax1.set_xlabel(r'$ Time \rightarrow$')
    ax1.set_ylabel('Altitude [m] MSL')
    ax1.set_title('BBY Windprof '+name[target])

    array = np.empty((40,len(bins)-1))
    for hgt in range(wp.shape[0]):
        row = wp[hgt,:]
        freq,bins=np.histogram(row[~np.isnan(row)],
                                bins=bins,
                                density=normalized)
        array[hgt,:]=freq

    x = bins
    y = wprof.hgt
    p = ax2.pcolormesh(x,y,array,cmap='viridis')
    amin = np.amin(array)
    amax = np.amax(array)
    cbar = add_colorbar(ax2,p,size='4%',ticks=[amin,amax])
    cbar.ax.set_yticklabels(['low','high'])
    ax2.set_xticks(hist_xticks)
    ax2.set_yticklabels('')
    ax2.set_xlabel(name[target])
    ax2.set_xlim(hist_xlim)
    ax2.set_title('Normalized frequency')

    plt.tight_layout()
    if pngsuffix:
        out_name = 'wprof_{}_{}.png'
        plt.savefig(out_name.format(target,pngsuffix))
    else:
        plt.show(block=False)
示例#8
0
    else:
        tb = t.text + ' | ' + t.text

    if end_czd > end_bby:
        te = t.red() + ' | ' + t.text
    else:
        te = t.text + ' | ' + t.text

    txtDate = tb + ' || ' + te

    fmt = '%Y-%m-%d %H:%M'
    b_bby = beg_bby.strftime(fmt)
    b_czd = beg_czd.strftime(fmt)
    e_bby = end_bby.strftime(fmt)
    e_czd = end_czd.strftime(fmt)

    print txtDate.format(b_bby, b_czd, e_czd, e_bby)

print '\nBBY windprof dates'
txtHeader = '{:^16} | {:^16}'
print txtHeader.format('Beg', 'End')
for y in [1998] + range(2001, 2013):
    wprof = parse_data.windprof(y)
    beg, end = wprof.check_beg_end()

    txtDate = '{} | {}'
    fmt = '%Y-%m-%d %H:%M'
    b = beg.strftime(fmt)
    e = end.strftime(fmt)
    print txtDate.format(b, e)
    def start_df(self, wdir_surf=None, wdir_wprof=None, 
              rain_bby=None,rain_czd=None,nhours=None):

        '''
            this version uses pandas dataframe, 
            it should be more accurate and simpler
        '''

        import pandas as pd

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' trim the head and tail of dataset depending
            on the latest time of the beginning and 
            earliest of the ending '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)
        time_end = min(end_bby, end_czd, end_wpr)

        ''' initializations '''
        onehr = timedelta(hours=1)
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        count = 0
        rng = pd.date_range(start=time_beg,
                            end=time_end,
                            freq='1H')
        cols = ('wssrf','wswpr','wdsrf','wdwpr','rbby','rczd','tta','consecutive')
        df = pd.DataFrame(index=rng,columns=cols)       
        time = time_beg
        
        ''' loop evaluates each time '''
        while (time <= time_end):

            surf_wd = bby.dframe.loc[time].wdir
            wpr_wd0 = wprof.dframe.loc[time].wdir[0]  # first gate
            pbby = bby.dframe.loc[time].precip
            pczd = czd.dframe.loc[time].precip

            if surf_wd is None:
                surf_wd = np.nan

            df.loc[time].wdsrf = surf_wd
            df.loc[time].wdwpr = wpr_wd0
            df.loc[time].rbby = pbby
            df.loc[time].rczd = pczd
            df.loc[time].wssrf = bby.dframe.loc[time].wspd
            df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0]

            ''' check conditions '''
            cond1 = (surf_wd <= wdir_surf)
            cond2 = (wpr_wd0 <= wdir_wprof)
            if rain_bby and rain_czd:
                cond3 = (pbby >= rain_bby)
                cond4 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3 and cond4
            elif rain_czd:
                cond3 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3
            elif rain_bby:
                cond3 = (pbby >= rain_bby)
                tta_condition = cond1 and cond2 and cond3
            else:
                tta_condition = cond1 and cond2


            df.loc[time].tta = tta_condition

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                ' reset buffer '
                bool_buffer = np.array([False] * nhours)
                count = 0

            time += onehr

        df.consecutive = tta_bool.astype(bool)

        ar_wdsrf = df.wdsrf.values.astype(float)
        ar_wdwpr = df.wdwpr.values.astype(float)
        ar_rbby = df.rbby.values.astype(float)
        ar_rczd = df.rczd.values.astype(float)
        
        wdsrfIsNan = np.isnan(ar_wdsrf)
        wdwprIsNan = np.isnan(ar_wdwpr)
        rbbyIsNan = np.isnan(ar_rbby)
        rczdIsNan = np.isnan(ar_rczd)
        
        
        if rain_czd is None:
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan        
        elif rain_czd >= 0.25:
            ''' this boolean excludes dates when there is no
                precip at CZD '''       
            zeros = np.zeros((1,len(ar_rbby)))
            rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T)                  
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \
                    | rczdIsZero


        tot_rbby = np.round(df.rbby.sum(),0).astype(int)
        tot_rczd = np.round(df.rczd.sum(),0).astype(int)

        exc_rbby = np.round(df[exclude].rbby.sum(),0).astype(int)
        exc_rczd = np.round(df[exclude].rczd.sum(),0).astype(int)

        inc_rbby = tot_rbby - exc_rbby
        inc_rczd = tot_rczd - exc_rczd

        tot_hrs   = np.round(df.index.size,0).astype(int)
        exc_hours = np.round(exclude.sum(),0).astype(int)
        inc_hours = tot_hrs - exc_hours

        tta_rbby   = np.round(df[df.consecutive].rbby.sum(),0).astype(int)
        tta_rczd   = np.round(df[df.consecutive].rczd.sum(),0).astype(int)
        notta_rbby = inc_rbby - tta_rbby
        notta_rczd = inc_rczd - tta_rczd

        exclude_dates = df[exclude].index
        include_dates = df[~exclude].index
        tta_dates     = df[~exclude & df.consecutive].index
        notta_dates   = df[~exclude & ~df.consecutive].index

        tta_hours   = tta_dates.size
        notta_hours = notta_dates.size

        self.time_beg           = time_beg
        self.time_end           = time_end
        self.count_hrs_include  = inc_hours
        self.count_hrs_exclude  = exc_hours
        self.tot_rainfall_bby   = tot_rbby
        self.tot_rainfall_czd   = tot_rczd
        self.inc_rainfall_bby   = inc_rbby
        self.inc_rainfall_czd   = inc_rczd
        self.exc_rainfall_bby   = exc_rbby
        self.exc_rainfall_czd   = exc_rczd        
        self.tta_rainfall_bby   = tta_rbby
        self.tta_rainfall_czd   = tta_rczd
        self.notta_rainfall_bby = notta_rbby
        self.notta_rainfall_czd = notta_rczd
        self.tta_hours          = tta_hours
        self.notta_hours        = notta_hours
        self.wprof_hgt          = wprof.hgt
        self.exclude_dates      = exclude_dates
        self.include_dates      = include_dates
        self.tta_dates          = tta_dates
        self.notta_dates        = notta_dates
        self.df                 = df
    def start(self, wdir_surf=None, wdir_wprof=None, 
              rain_bby=None,rain_czd=None,nhours=None):

        ''' this is an old verion
            prefer start_df that uses pandas dataframe
            for analysis
        '''

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' the latest of the beg '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)

        ''' the earliest of the end '''
        time_end = min(end_bby, end_czd, end_wpr)

        ''' rainfall before all obs start '''
        rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip)
        rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip)

        ''' rainfall after all obs end '''
        rbby_after = np.nansum(bby.dframe.loc[time_end:].precip)
        rczd_after = np.nansum(czd.dframe.loc[time_end:].precip)

        ''' number of windprofiles before (after)
            all obs start (end) '''
        nwprof_before = len(wprof.dframe.loc[:time_beg].wdir)
        nwprof_after = len(wprof.dframe.loc[time_end:].wdir)

        onehr = timedelta(hours=1)
        time = time_beg
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        rainfall_czd = np.array([])
        rainfall_bby = np.array([])
#        wpr_wd_inc = []
#        wpr_ws_inc = []
        count = 0
        count_while = 0
        count_exclude = 0

        while (time <= time_end):
                
            surf_wd = bby.dframe.loc[time].wdir
            wpr_wd0 = wprof.dframe.loc[time].wdir[0]  # first gate
            pbby = bby.dframe.loc[time].precip
            pczd = czd.dframe.loc[time].precip

            ''' exclude data when there is nan in 
                surf obs or windprof first gate '''
            if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0):
                # tta_bool = np.append(tta_bool, [False])
                count_exclude += 1
                time += onehr
                continue

            ''' these are obs included in the analysis, then we
                determine if they are tta or no-tta '''
            rainfall_bby=np.append(rainfall_bby,pbby)
            rainfall_czd=np.append(rainfall_czd,pczd)


            ''' check conditions '''
            cond1 = (surf_wd <= wdir_surf)
            cond2 = (wpr_wd0 <= wdir_wprof)
            if rain_bby and rain_czd:
                cond3 = (pbby >= rain_bby)
                cond4 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and \
                                cond3 and cond4
            elif rain_czd:
                cond3 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3
            elif rain_bby:
                cond3 = (pbby >= rain_bby)
                tta_condition = cond1 and cond2 and cond3
            else:
                tta_condition = cond1 and cond2

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                # reset buffer
                bool_buffer = np.array([False] * nhours)
                count = 0

            count_while += 1
            time += onehr

        tta_bool = np.array(tta_bool).astype(bool)
        tta_hours = tta_bool.sum()
        notta_hours = count_while-tta_hours
        self.tta_hours = tta_hours
        self.notta_hours = notta_hours
        self.time_beg = time_beg
        self.time_end = time_end
        self.count_while = count_while
        self.count_exclude = count_exclude
        self.total_rainfall_bby = np.nansum(rainfall_bby)
        self.total_rainfall_czd = np.nansum(rainfall_czd)
        self.bool = tta_bool
        self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool])
        self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool])
        self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool])
        self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool])
        self.rainfall_bby_before_analysis = rbby_before
        self.rainfall_bby_after_analysis = rbby_after
        self.rainfall_czd_before_analysis = rczd_before
        self.rainfall_czd_after_analysis = rczd_after
        self.nwprof_before = nwprof_before
        self.nwprof_after = nwprof_after
        self.wprof_hgt = wprof.hgt



        print('TTA analysis finished')
    def start_df_layer(self,
                       wdir_thres  = None,
                       wdir_layer  = [None,None],  # [meters]
                       rain_bby    = None,
                       rain_czd    = None,
                       nhours      = None):

        '''
            this version uses pandas dataframe similar
            to start_df but uses a layer instead of a 
            level            
        '''

        import pandas as pd

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)
        
        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' trim the head and tail of dataset depending
            on the latest time of the beginning and 
            earliest of the ending '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)
        time_end = min(end_bby, end_czd, end_wpr)

        ''' initializations '''
        onehr = timedelta(hours=1)
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        count = 0
        rng = pd.date_range(start = time_beg,
                            end   = time_end,
                            freq  = '1H')


        idx = np.where((wprof.hgt>=wdir_layer[0]) & 
                       (wprof.hgt<wdir_layer[1]))[0]

        wphgt = wprof.hgt[idx]

        ''' columns included in the dataframe '''        
        cols = []        
        wdircol = 'wd_{}-{:2.0f}m'.format(wdir_layer[0],wphgt[-1])
        cols.append(wdircol)
        cols.append('rbby')
        cols.append('rczd')
        cols.append('tta')
        cols.append('consecutive')
        
        ''' create dataframe '''
        df = pd.DataFrame(index=rng,columns=cols)       
        
        
        ''' loop evaluates each time '''
        time = time_beg
        while (time <= time_end):

            if wdir_layer[0] == 0:
                surf_wd = np.array(bby.dframe.loc[time].wdir)
                surf_ws = np.array(bby.dframe.loc[time].wspd)
            else:
                surf_wd = np.array([])
                surf_ws = np.array([])
                
            wpro_wd = np.array(wprof.dframe.loc[time].wdir)[idx]
            wpro_ws = np.array(wprof.dframe.loc[time].wspd)[idx]

            wd = np.append(surf_wd,wpro_wd)
            ws = np.append(surf_ws,wpro_ws)
            
            u = -ws*np.sin(np.radians(wd))
            v = -ws*np.cos(np.radians(wd))
            u_mean = u.mean()
            v_mean = v.mean()
#            ws_mean = np.sqrt(u_mean**2+v_mean**2)
            wd_mean = 270 - np.arctan2(v_mean,u_mean)*180./np.pi
            if wd_mean > 360:
                wd_mean -= 360
            
            
            df.loc[time][wdircol] = wd_mean
            
            pbby = bby.dframe.loc[time].precip
            df.loc[time].rbby = pbby            
            
            pczd = czd.dframe.loc[time].precip
            df.loc[time].rczd = pczd
                
           
#            df.loc[time].wssrf = bby.dframe.loc[time].wspd
#            df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0]

            ''' check conditions '''               
            if wdir_thres:
                if isinstance(wdir_thres,int):
                    cond1 = (wd_mean < wdir_thres)
                elif isinstance(wdir_thres,str):
                    cond1 = parse_operator(wd_mean,wdir_thres)

            if rain_czd:
                cond3 = (pczd >= rain_czd)

            if rain_bby:            
                cond4 = (pbby >= rain_bby)
              
            ''' create joint condition '''
            if wdir_thres and rain_bby and rain_czd:
                tta_condition = cond1 and cond3 and cond4
            elif wdir_thres and rain_czd:
                tta_condition = cond1 and cond3
            elif wdir_thres and rain_bby:
                tta_condition = cond1 and cond4
            else:
                tta_condition = cond1 


            df.loc[time].tta = tta_condition

            ''' construct boolean array indicating
                hourly TTA conditions with minimum
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                ' reset buffer '
                bool_buffer = np.array([False] * nhours)
                count = 0

#
            time += onehr

        df.consecutive = tta_bool.astype(bool)

        ar_wdir = df[wdircol].values.astype(float)
        ar_rbby = df.rbby.values.astype(float)
        ar_rczd = df.rczd.values.astype(float)
        
        wdirIsNan = np.isnan(ar_wdir)
        rbbyIsNan = np.isnan(ar_rbby)
        rczdIsNan = np.isnan(ar_rczd)
        
        
        if rain_czd is None:
            exclude = wdirIsNan | rbbyIsNan | rczdIsNan        
        elif rain_czd >= 0.25:
            ''' this boolean excludes dates when there is no
                precip at CZD '''       
            zeros = np.zeros((1,len(ar_rbby)))
            rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T)                  
            exclude = wdirIsNan | rbbyIsNan | rczdIsNan | rczdIsZero


        tot_rbby = np.round(df.rbby.sum(),3)
        tot_rczd = np.round(df.rczd.sum(),3)

        exc_rbby = np.round(df[exclude].rbby.sum(),3)
        exc_rczd = np.round(df[exclude].rczd.sum(),3)

        inc_rbby = tot_rbby - exc_rbby
        inc_rczd = tot_rczd - exc_rczd

        tot_hrs   = np.round(df.index.size,0).astype(int)
        exc_hours = np.round(exclude.sum(),0).astype(int)
        inc_hours = tot_hrs - exc_hours

        tta_rbby   = np.round(df[df.consecutive].rbby.sum(),3)
        tta_rczd   = np.round(df[df.consecutive].rczd.sum(),3)
        notta_rbby = inc_rbby - tta_rbby
        notta_rczd = inc_rczd - tta_rczd

        exclude_dates = df[exclude].index
        include_dates = df[~exclude].index
        tta_dates     = df[~exclude & df.consecutive].index
        notta_dates   = df[~exclude & ~df.consecutive].index

        tta_hours   = tta_dates.size
        notta_hours = notta_dates.size

        self.time_beg           = time_beg
        self.time_end           = time_end
        self.count_hrs_include  = inc_hours
        self.count_hrs_exclude  = exc_hours
        self.tot_rainfall_bby   = tot_rbby
        self.tot_rainfall_czd   = tot_rczd
        self.inc_rainfall_bby   = inc_rbby
        self.inc_rainfall_czd   = inc_rczd
        self.exc_rainfall_bby   = exc_rbby
        self.exc_rainfall_czd   = exc_rczd        
        self.tta_rainfall_bby   = tta_rbby
        self.tta_rainfall_czd   = tta_rczd
        self.notta_rainfall_bby = notta_rbby
        self.notta_rainfall_czd = notta_rczd
        self.tta_hours          = tta_hours
        self.notta_hours        = notta_hours
        self.wprof_hgt          = wprof.hgt
        self.exclude_dates      = exclude_dates
        self.include_dates      = include_dates
        self.tta_dates          = tta_dates
        self.notta_dates        = notta_dates
        self.df                 = df
    def start_df(self, wdir_surf   = None,
                       wdir_wprof  = None,
                       wprof_gate  = 0,
                       rain_bby    = None,
                       rain_czd    = None,
                       nhours      = None):

        '''
            this version uses pandas dataframe, 
            it should be more accurate and simpler
            than start method
        '''

        import pandas as pd

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' trim the head and tail of dataset depending
            on the latest time of the beginning and 
            earliest of the ending '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)
        time_end = min(end_bby, end_czd, end_wpr)

        ''' initializations '''
        onehr = timedelta(hours=1)
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        count = 0
        rng = pd.date_range(start=time_beg,
                            end=time_end,
                            freq='1H')

        ''' columns included in the dataframe '''        
        cols = []        
        cols.append('wdsrf')
        wprofcol = 'wdwpr{}'.format(wprof_gate)
        cols.append(wprofcol)
        cols.append('rbby')
        cols.append('rczd')
        cols.append('tta')
        cols.append('consecutive')
        
        ''' create dataframe '''
        df = pd.DataFrame(index=rng,columns=cols)       
        
        
        ''' loop evaluates each time '''
        time = time_beg
        while (time <= time_end):

            surf_wd = bby.dframe.loc[time].wdir
            df.loc[time].wdsrf = surf_wd            
            
            wpr_wd0 = wprof.dframe.loc[time].wdir[wprof_gate] 
            df.loc[time][wprofcol] = wpr_wd0            
            
            pbby = bby.dframe.loc[time].precip
            df.loc[time].rbby = pbby            
            
            pczd = czd.dframe.loc[time].precip
            df.loc[time].rczd = pczd
                
#            if surf_wd is None:
#                surf_wd = np.nan
           
#            df.loc[time].wssrf = bby.dframe.loc[time].wspd
#            df.loc[time].wswpr = wprof.dframe.loc[time].wspd[0]

            ''' check conditions '''               
            if wdir_surf:
                if isinstance(wdir_surf,int):
                    cond1 = (surf_wd <= wdir_surf)
                elif isinstance(wdir_surf,str):
                    cond1 = parse_operator(surf_wd,wdir_surf)

            if wdir_wprof:
                if isinstance(wdir_wprof,int):
                    cond2 = (wpr_wd0 <= wdir_wprof) 
                elif isinstance(wdir_wprof,str):
                    cond2 = parse_operator(wpr_wd0,wdir_wprof)

            if rain_czd:
                cond3 = (pczd >= rain_czd)

            if rain_bby:            
                cond4 = (pbby >= rain_bby)
              
            ''' create joint condition '''
            if wdir_surf and wdir_wprof and rain_bby and rain_czd:
                tta_condition = cond1 and cond2 and cond3 and cond4
            elif wdir_surf and wdir_wprof and rain_czd:
                tta_condition = cond1 and cond2 and cond3
            elif wdir_surf and wdir_wprof and rain_bby:
                tta_condition = cond1 and cond2 and cond4
            elif wdir_surf and rain_czd:
                tta_condition = cond1 and cond3
            elif wdir_wprof and rain_czd:
                tta_condition = cond2 and cond3                
            elif wdir_surf and rain_bby:
                tta_condition = cond1 and cond4
            elif wdir_wprof and rain_bby:
                tta_condition = cond2 and cond4                
            elif wdir_surf and wdir_wprof:
                tta_condition = cond1 and cond2
            else:
                tta_condition = cond1 


            df.loc[time].tta = tta_condition

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                ' reset buffer '
                bool_buffer = np.array([False] * nhours)
                count = 0

            time += onehr

        df.consecutive = tta_bool.astype(bool)

        ar_wdsrf = df.wdsrf.values.astype(float)
        ar_wdwpr = df[wprofcol].values.astype(float)
        ar_rbby = df.rbby.values.astype(float)
        ar_rczd = df.rczd.values.astype(float)
        
        wdsrfIsNan = np.isnan(ar_wdsrf)
        wdwprIsNan = np.isnan(ar_wdwpr)
        rbbyIsNan = np.isnan(ar_rbby)
        rczdIsNan = np.isnan(ar_rczd)
        
        
        if rain_czd is None:
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan        
        elif rain_czd >= 0.25:
            ''' this boolean excludes dates when there is no
                precip at CZD '''       
            zeros = np.zeros((1,len(ar_rbby)))
            rczdIsZero = np.squeeze(np.equal(ar_rczd,zeros).T)                  
            exclude = wdsrfIsNan | wdwprIsNan | rbbyIsNan | rczdIsNan \
                    | rczdIsZero


        tot_rbby = np.round(df.rbby.sum(),3)
        tot_rczd = np.round(df.rczd.sum(),3)

        exc_rbby = np.round(df[exclude].rbby.sum(),3)
        exc_rczd = np.round(df[exclude].rczd.sum(),3)

        inc_rbby = tot_rbby - exc_rbby
        inc_rczd = tot_rczd - exc_rczd

        tot_hrs   = np.round(df.index.size,0).astype(int)
        exc_hours = np.round(exclude.sum(),0).astype(int)
        inc_hours = tot_hrs - exc_hours

        tta_rbby   = np.round(df[df.consecutive].rbby.sum(),3)
        tta_rczd   = np.round(df[df.consecutive].rczd.sum(),3)
        notta_rbby = inc_rbby - tta_rbby
        notta_rczd = inc_rczd - tta_rczd

        exclude_dates = df[exclude].index
        include_dates = df[~exclude].index
        tta_dates     = df[~exclude & df.consecutive].index
        notta_dates   = df[~exclude & ~df.consecutive].index

        tta_hours   = tta_dates.size
        notta_hours = notta_dates.size

        self.time_beg           = time_beg
        self.time_end           = time_end
        self.count_hrs_include  = inc_hours
        self.count_hrs_exclude  = exc_hours
        self.tot_rainfall_bby   = tot_rbby
        self.tot_rainfall_czd   = tot_rczd
        self.inc_rainfall_bby   = inc_rbby
        self.inc_rainfall_czd   = inc_rczd
        self.exc_rainfall_bby   = exc_rbby
        self.exc_rainfall_czd   = exc_rczd        
        self.tta_rainfall_bby   = tta_rbby
        self.tta_rainfall_czd   = tta_rczd
        self.notta_rainfall_bby = notta_rbby
        self.notta_rainfall_czd = notta_rczd
        self.tta_hours          = tta_hours
        self.notta_hours        = notta_hours
        self.wprof_hgt          = wprof.hgt
        self.exclude_dates      = exclude_dates
        self.include_dates      = include_dates
        self.tta_dates          = tta_dates
        self.notta_dates        = notta_dates
        self.df                 = df
def plot_with_hist(year=None, target=None, normalized=True, pngsuffix=None):

    name = {'wdir': 'Wind Direction', 'wspd': 'Wind Speed'}

    if target == 'wdir':
        vmin, vmax = [0, 360]
        bins = np.arange(0, 370, 10)
        hist_xticks = np.arange(0, 400, 40)
        hist_xlim = [0, 360]
    elif target == 'wspd':
        vmin, vmax = [0, 30]
        bins = np.arange(0, 36, 1)
        hist_xticks = np.arange(0, 40, 5)
        hist_xlim = [0, 35]

    fig = plt.figure(figsize=(20, 5))

    gs = gsp.GridSpec(1, 2, width_ratios=[3, 1])

    ax1 = plt.subplot(gs[0])
    ax2 = plt.subplot(gs[1])

    wprof = parse_data.windprof(year)
    wp = np.squeeze(pandas2stack(wprof.dframe[target]))
    wp_ma = ma.masked_where(np.isnan(wp), wp)
    X, Y = wprof.time, wprof.hgt
    p = ax1.pcolormesh(X, Y, wp_ma, vmin=vmin, vmax=vmax)
    add_colorbar(ax1, p)
    ax1.xaxis.set_major_locator(mdates.MonthLocator())
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
    ax1.set_xlabel(r'$ Time \rightarrow$')
    ax1.set_ylabel('Altitude [m] MSL')
    ax1.set_title('BBY Windprof ' + name[target])

    array = np.empty((40, len(bins) - 1))
    for hgt in range(wp.shape[0]):
        row = wp[hgt, :]
        freq, bins = np.histogram(row[~np.isnan(row)],
                                  bins=bins,
                                  density=normalized)
        array[hgt, :] = freq

    x = bins
    y = wprof.hgt
    p = ax2.pcolormesh(x, y, array, cmap='viridis')
    amin = np.amin(array)
    amax = np.amax(array)
    cbar = add_colorbar(ax2, p, size='4%', ticks=[amin, amax])
    cbar.ax.set_yticklabels(['low', 'high'])
    ax2.set_xticks(hist_xticks)
    ax2.set_yticklabels('')
    ax2.set_xlabel(name[target])
    ax2.set_xlim(hist_xlim)
    ax2.set_title('Normalized frequency')

    plt.tight_layout()
    if pngsuffix:
        out_name = 'wprof_{}_{}.png'
        plt.savefig(out_name.format(target, pngsuffix))
    else:
        plt.show(block=False)
def preprocess(years=None, layer=None, verbose=True):

    import pandas as pd
    import parse_data

    WD = pd.Series()
    WS = pd.Series()
    WD_rain = pd.Series()
    WS_rain = pd.Series()
    precip_good = pd.DataFrame()

    for year in years:

        wpr = parse_data.windprof(year=year)
        bby = parse_data.surface('bby', year=year)
        czd = parse_data.surface('czd', year=year)
        hgt = wpr.hgt

        ' find common time period '
        first_bby = bby.dframe.index[0]
        first_czd = czd.dframe.index[0]
        first_wpr = wpr.dframe.index[0]
        last_bby = bby.dframe.index[-1]
        last_czd = czd.dframe.index[-1]
        last_wpr = wpr.dframe.index[-1]
        first = max(first_bby, first_czd, first_wpr)
        last = min(last_bby, last_czd, last_wpr)

        ' reduce time interval so all start and end at same time '
        wpr = wpr.dframe.loc[first:last]
        bby = bby.dframe.loc[first:last]
        czd = czd.dframe.loc[first:last]

        ' append surface values to windprof to make entire profile '
        surf_wsp = iter(bby.wspd.values.tolist())
        surf_wdr = iter(bby.wdir.values.tolist())
        wsp = wpr.wspd.map(lambda x: [surf_wsp.next()] + x)
        wdr = wpr.wdir.map(lambda x: [surf_wdr.next()] + x)
        hgt = np.append([0], hgt)

        ' check nans on precip '
        precip = pd.concat([bby.precip, czd.precip], axis=1)
        precip.columns = ['bby', 'czd']
        precip_nans = precip.apply(lambda x: x.isnull().any(),
                                   axis=1,
                                   reduce=True)
        precip_nans.name = 'precip_nan'
        tx = 'year:{}, any_precip_nan:{:4d}'
        if verbose:
            print(tx.format(year, precip_nans.sum()))

        ' check entire profile nans ( same for ws and wd)'
        prof_nans = wsp.apply(lambda x: np.isnan(x).all())
        prof_nans.name = 'prof_nan'

        ' include only hours when surf and the entire' \
        ' profile is non-missing (profile is allowed to have' \
        ' at least one non-missing)'
        nan_df = pd.concat([precip_nans, prof_nans], axis=1)
        any_nan = nan_df.apply(lambda x: x.any(), axis=1, reduce=True)
        include = ~any_nan
        precip_good = precip_good.append(precip[include])

        ' rainy days at CZD '
        rain_czd = czd.precip > 0

        ' reduce and save to big Series '
        wdr = wdr[include]
        wsp = wsp[include]
        wdr_rain = wdr[rain_czd]
        wsp_rain = wsp[rain_czd]
        WD = WD.append(wdr)
        WS = WS.append(wsp)
        WD_rain = WD_rain.append(wdr_rain)
        WS_rain = WS_rain.append(wsp_rain)

    " compute components "
    WD_sin = WD.apply(lambda x: sin(x))
    WD_cos = WD.apply(lambda x: cos(x))
    U_df = -1 * WS.multiply(WD_sin)
    V_df = -1 * WS.multiply(WD_cos)
    wind_flow_180 = -(U_df * sin(180) + V_df * cos(180))
    wind_flow_90 = U_df * sin(90) + V_df * cos(90)

    " layer-mean"
    layer_idx = np.where((hgt >= layer[0]) & (hgt < layer[1]))[0]
    mean_V = wind_flow_180.apply(lambda x: np.nanmean(x[layer_idx]))
    mean_U = wind_flow_90.apply(lambda x: np.nanmean(x[layer_idx]))
    wd_layer = 270 - (np.arctan2(mean_V, mean_U) * 180 / np.pi)
    wd_layer[wd_layer > 360] -= 360
    wd_layer.name = '{:2.0f}-{:2.0f}m'.format(hgt[layer_idx[0]],
                                              hgt[layer_idx[-1]])

    return dict(WD=WD,
                WS=WS,
                WD_rain=WD_rain,
                WS_rain=WS_rain,
                wd_layer=wd_layer,
                precip=precip,
                precip_good=precip_good)
try:
    WS
except NameError:
#    ws = {th:list() for th in target_hgts}
#    wd = {th:list() for th in target_hgts}
#    wdsrf = list()
    
    WS = pd.DataFrame()
    WD = pd.DataFrame()
    
    for year in years:
          
        czd = parse_data.surface('czd', year=year)        
        bby = parse_data.surface('bby', year=year)
        wpr = parse_data.windprof(year=year)
        hgt = wpr.hgt
        
        ''' reduce to common time period '''
        first_bby = bby.dframe.index[0]
        first_czd = czd.dframe.index[0]
        first_wpr = wpr.dframe.index[0]
    
        last_bby = bby.dframe.index[-1]
        last_czd = czd.dframe.index[-1]
        last_wpr = wpr.dframe.index[-1]
        
        first = max(first_bby,first_czd,first_wpr)   
        last  = min(last_bby,last_czd,last_wpr)

        wspd = wpr.dframe.loc[first:last].wspd
    def start(self, wdir_surf=None, wdir_wprof=None, 
              rain_bby=None,rain_czd=None,nhours=None):

        ''' this is an old verion
            prefer start_df that uses pandas dataframe
        '''

        bby = parse_data.surface('bby', self.year)
        czd = parse_data.surface('czd', self.year)
        wprof = parse_data.windprof(self.year)

        beg_bby, end_bby = bby.check_beg_end()
        beg_czd, end_czd = czd.check_beg_end()
        beg_wpr, end_wpr = wprof.check_beg_end()

        ''' the latest of the beg '''
        time_beg = max(beg_bby, beg_czd, beg_wpr)

        ''' the earliest of the end '''
        time_end = min(end_bby, end_czd, end_wpr)

        ''' rainfall before all obs start '''
        rbby_before = np.nansum(bby.dframe.loc[:time_beg].precip)
        rczd_before = np.nansum(czd.dframe.loc[:time_beg].precip)

        ''' rainfall after all obs end '''
        rbby_after = np.nansum(bby.dframe.loc[time_end:].precip)
        rczd_after = np.nansum(czd.dframe.loc[time_end:].precip)

        ''' number of windprofiles before (after)
            all obs start (end) '''
        nwprof_before = len(wprof.dframe.loc[:time_beg].wdir)
        nwprof_after = len(wprof.dframe.loc[time_end:].wdir)

        onehr = timedelta(hours=1)
        time = time_beg
        bool_buffer = np.array([False] * nhours)
        tta_bool = np.array([])
        rainfall_czd = np.array([])
        rainfall_bby = np.array([])
#        wpr_wd_inc = []
#        wpr_ws_inc = []
        count = 0
        count_while = 0
        count_exclude = 0

        while (time <= time_end):
                
            surf_wd = bby.dframe.loc[time].wdir
            wpr_wd0 = wprof.dframe.loc[time].wdir[0]  # first gate
            pbby = bby.dframe.loc[time].precip
            pczd = czd.dframe.loc[time].precip



            ''' exclude data when there is nan in 
                surf obs or windprof first gate '''
            if surf_wd is None or np.isnan(surf_wd) or np.isnan(wpr_wd0):
                # tta_bool = np.append(tta_bool, [False])
                count_exclude += 1
                time += onehr
                continue



            ''' these are obs included in the analysis, then we
                determine if they are tta or no-tta '''
            rainfall_bby=np.append(rainfall_bby,pbby)
            rainfall_czd=np.append(rainfall_czd,pczd)


            ''' check conditions '''
            cond1 = (surf_wd <= wdir_surf)
            cond2 = (wpr_wd0 <= wdir_wprof)
            if rain_bby and rain_czd:
                cond3 = (pbby >= rain_bby)
                cond4 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and \
                                cond3 and cond4
            elif rain_czd:
                cond3 = (pczd >= rain_czd)
                tta_condition = cond1 and cond2 and cond3
            elif rain_bby:
                cond3 = (pbby >= rain_bby)
                tta_condition = cond1 and cond2 and cond3
            else:
                tta_condition = cond1 and cond2

            ''' construct boolean array indicating
                hourly TTA conditions with minumm
                of nhours '''
            if tta_condition and bool_buffer.all():
                tta_bool = np.append(tta_bool, [True])
            elif tta_condition:
                bool_buffer[count] = True
                count += 1
                if bool_buffer.all():
                    tta_bool = np.append(tta_bool, bool_buffer)
            else:
                bufsum = bool_buffer.sum()
                if bufsum == 0 or bufsum == nhours:
                    tta_bool = np.append(tta_bool, [False])
                else:
                    tta_bool = np.append(tta_bool, [False] * (bufsum + 1))
                # reset buffer
                bool_buffer = np.array([False] * nhours)
                count = 0

            count_while += 1
            time += onehr



        tta_bool = np.array(tta_bool).astype(bool)
        tta_hours = tta_bool.sum()
        notta_hours = count_while-tta_hours
        self.tta_hours = tta_hours
        self.notta_hours = notta_hours
        self.time_beg = time_beg
        self.time_end = time_end
        self.count_while = count_while
        self.count_exclude = count_exclude
        self.total_rainfall_bby = np.nansum(rainfall_bby)
        self.total_rainfall_czd = np.nansum(rainfall_czd)
        self.bool = tta_bool
        self.tta_rainfall_czd = np.nansum(rainfall_czd[tta_bool])
        self.tta_rainfall_bby = np.nansum(rainfall_bby[tta_bool])
        self.notta_rainfall_czd = np.nansum(rainfall_czd[~tta_bool])
        self.notta_rainfall_bby = np.nansum(rainfall_bby[~tta_bool])
        self.rainfall_bby_before_analysis = rbby_before
        self.rainfall_bby_after_analysis = rbby_after
        self.rainfall_czd_before_analysis = rczd_before
        self.rainfall_czd_after_analysis = rczd_after
        self.nwprof_before = nwprof_before
        self.nwprof_after = nwprof_after
        self.wprof_hgt = wprof.hgt



        print('TTA analysis finished')
示例#17
0
#years = [1998]
years = [1998] + range(2001, 2013)

try:
    wdsrf
except NameError:
    ws = {th: list() for th in target_hgts}
    wd = {th: list() for th in target_hgts}
    wdsrf = list()

    select_rain = 'all'

    for year in years:

        wpr = parse_data.windprof(year=year)
        wspd = wpr.dframe.wspd
        wdir = wpr.dframe.wdir
        hgt = wpr.hgt

        czd = parse_data.surface('czd', year=year)
        bby = parse_data.surface('bby', year=year)

        if select_rain == 'all':
            select = None
        elif select_rain == 'czd':
            rain_czd = czd.dframe.precip > 0
            select = rain_czd[rain_czd].index
        elif select_rain == 'bby':
            rain_bby = bby.dframe.precip > 0
            select = rain_bby[rain_bby].index
def plot(year=[],target=None,pngsuffix=False, normalized=True,
        contourf=True, pdfsuffix=False, wdsurf=None, wdwpro=None,
        rainbb=None, raincz=None, nhours=None):
    
    name={'wdir':'Wind Direction',
          'wspd':'Wind Speed'}

    if target == 'wdir':
        bins = np.arange(0,370,10)
        hist_xticks = np.arange(0,420,60)
        hist_xlim = [0,360]
    elif target == 'wspd':
        bins = np.arange(0,36,1)
        hist_xticks = np.arange(0,40,5)
        hist_xlim = [0,35]

    first = True        
    for y in year:
        print('Processing year {}'.format(y))

        ' tta analysis '
        tta = tta_analysis(y)
        tta.start_df(wdir_surf=wdsurf,
                       wdir_wprof=wdwpro,
                       rain_bby=rainbb,
                       rain_czd=raincz,
                       nhours=nhours)


        ' retrieve dates '
        include_dates = tta.include_dates
        tta_dates = tta.tta_dates
        notta_dates = tta.notta_dates

        ' read wprof '
        wprof_df = parse_data.windprof(y)
        wprof = wprof_df.dframe[target]        

        ' wprof partition '
        wprof = wprof.loc[include_dates]    # all included
        wprof_tta = wprof.loc[tta_dates]    # only tta
        wprof_notta = wprof.loc[notta_dates]# only notta
        
        s1 = np.squeeze(pandas2stack(wprof))
        s2 = np.squeeze(pandas2stack(wprof_tta))
        s3 = np.squeeze(pandas2stack(wprof_notta))

        if first:
            wp = s1
            wp_tta = s2
            wp_notta = s3
            first = False
        else:
            wp = np.hstack((wp,s1))
            wp_tta = np.hstack((wp_tta,s2))
            wp_notta = np.hstack((wp_notta, s3))

    _,wp_hours = wp.shape
    _,tta_hours = wp_tta.shape
    _,notta_hours = wp_notta.shape

    ' makes CFAD '
    hist_array = np.empty((40,len(bins)-1,3))
    for hgt in range(wp.shape[0]):
        
        row1 = wp[hgt,:]
        row2 = wp_tta[hgt,:]
        row3 = wp_notta[hgt,:]

        for n,r in enumerate([row1,row2,row3]):

            ' following CFAD Yuter et al (1995) '
            freq,bins=np.histogram(r[~np.isnan(r)],
                                    bins=bins)
            if normalized:
                hist_array[hgt,:,n] = 100.*(freq/float(freq.sum()))
            else:
                hist_array[hgt,:,n] = freq


    fig,axs = plt.subplots(1,3,sharey=True,figsize=(10,8))

    ax1 = axs[0]
    ax2 = axs[1]
    ax3 = axs[2]

    hist_wp = np.squeeze(hist_array[:,:,0])
    hist_wptta = np.squeeze(hist_array[:,:,1])
    hist_wpnotta = np.squeeze(hist_array[:,:,2])

    x = bins
    y = wprof_df.hgt

    if contourf:
        X,Y = np.meshgrid(x,y)
        nancol = np.zeros((40,1))+np.nan
        hist_wp = np.hstack((hist_wp,nancol))
        hist_wptta = np.hstack((hist_wptta,nancol))
        hist_wpnotta = np.hstack((hist_wpnotta,nancol))

        vmax=20
        nlevels = 10
        delta = int(vmax/nlevels)
        v = np.arange(2,vmax+delta,delta)

        cmap = cm.get_cmap('plasma')

        ax1.contourf(X,Y,hist_wp,v,cmap=cmap)
        p = ax2.contourf(X,Y,hist_wptta,v,cmap=cmap,extend='max')
        p.cmap.set_over(cmap(1.0))
        ax3.contourf(X,Y,hist_wpnotta,v,cmap=cmap)
        cbar = add_colorbar(ax3,p,size='4%')
    else:
        p = ax1.pcolormesh(x,y,hist_wp,cmap='viridis')
        ax2.pcolormesh(x,y,hist_wptta,cmap='viridis')
        ax3.pcolormesh(x,y,hist_wpnotta,cmap='viridis')
        amin = np.amin(hist_wpnotta)
        amax = np.amax(hist_wpnotta)
        cbar = add_colorbar(ax3,p,size='4%',ticks=[amin,amax])
        cbar.ax.set_yticklabels(['low','high'])


    ' --- setup ax1 --- '
    amin = np.amin(hist_wp)
    amax = np.amax(hist_wp)
    ax1.set_xticks(hist_xticks)
    ax1.set_xlim(hist_xlim)
    ax1.set_ylim([0,4000])
    txt = 'All profiles (n={})'.format(wp_hours)
    ax1.text(0.5,0.95,txt,fontsize=15,
            transform=ax1.transAxes,va='bottom',ha='center')
    ax1.set_ylabel('Altitude [m] MSL')

    ' --- setup ax2 --- '
    amin = np.amin(hist_wptta)
    amax = np.amax(hist_wptta)
    ax2.set_xticks(hist_xticks)
    ax2.set_xlim(hist_xlim)
    ax2.set_ylim([0,4000])
    ax2.set_xlabel(name[target])
    txt = 'TTA (n={})'.format(tta_hours)
    ax2.text(0.5,0.95,txt,fontsize=15,
            transform=ax2.transAxes,va='bottom',ha='center')

    ' --- setup ax3 --- '
    ax3.set_xticks(hist_xticks)
    ax3.set_xlim(hist_xlim)
    ax3.set_ylim([0,4000])
    txt = 'NO-TTA (n={})'.format(notta_hours)
    ax3.text(0.5,0.95,txt,fontsize=15,
            transform=ax3.transAxes,va='bottom',ha='center')


    title = 'Normalized frequencies of BBY wind profiles {} \n'
    title += 'TTA wdir_surf:{}, wdir_wp:{}, '
    title += 'rain_bby:{}, rain_czd:{}, nhours:{}'
    
    if len(year) == 1:
        yy = 'year {}'.format(year[0])
    else:
        yy = 'year {} to {}'.format(year[0],year[-1])
    plt.suptitle(title.format(yy, wdsurf, wdwpro, rainbb, raincz, nhours),
                fontsize=15)

    plt.subplots_adjust(top=0.9,left=0.1,right=0.95,bottom=0.1, wspace=0.1)
     
    if pngsuffix:
        out_name = 'wprof_{}_cfad{}.png'
        plt.savefig(out_name.format(target,pngsuffix))
        plt.close()
    elif pdfsuffix:
        out_name = 'wprof_{}_cfad{}.pdf'
        plt.savefig(out_name.format(target,pdfsuffix))
        plt.close()        
    else:
        plt.show()