def __init__(self, msg_folder, y1=y1, y2=y2, months=None): yrange = range(y1, y2 + 1) # 1998, 2014 if months is None: mrange = range(1, 13) else: if len(months) > 1: mrange = months else: mrange = range(months[0], months[0] + 1) try: lpath = uarr.locate('lon.npz', msg_folder, exclude=None) except: print('Not a directory or no msg lat/lon found') return mpath = os.path.join(msg_folder, 'msg_raw_binary') try: os.path.isdir(mpath) except: print('No msg_raw_binary') quit() rfiles = [] for yr, mo in itertools.product( yrange, mrange): # rain_f4 files only available for 6 to 10 filepath = os.path.join(mpath, str(yr), str(mo).zfill(2)) try: files = uarr.locate('.gra', filepath, exclude='_182') except OSError: continue rfiles.extend(files) rfiles.sort(key=ul.natural_keys) msg_latlon = np.load(lpath[0]) mlon = msg_latlon['lon'] mlat = msg_latlon['lat'] self.lat = mlat self.lon = mlon self.nx = mlon.shape[1] self.ny = mlon.shape[0] self.years = os.listdir(mpath) self.root = msg_folder self.fpath = rfiles
def __init__(self, msg_folder, y1=y1, y2=y2, months=None): yrange = range(y1, y2+1) # 1998, 2014 if months is None: mrange = range(1,13) else: if len(months) > 1: mrange = months else: mrange = range(months[0],months[0]+1) try: lpath = uarr.locate('lon.npz', msg_folder, exclude = None) except: print('Not a directory or no msg lat/lon found') return mpath = os.path.join(msg_folder, 'msg_raw_binary') try: os.path.isdir(mpath) except: print('No msg_raw_binary') quit() rfiles = [] for yr, mo in itertools.product(yrange, mrange): # rain_f4 files only available for 6 to 10 filepath = os.path.join(mpath, str(yr), str(mo).zfill(2)) try: files = uarr.locate('.gra', filepath, exclude = '_182') except OSError: continue rfiles.extend(files) rfiles.sort(key=ul.natural_keys) msg_latlon = np.load(lpath[0]) mlon = msg_latlon['lon'] mlat = msg_latlon['lat'] self.lat = mlat self.lon = mlon self.nx = mlon.shape[1] self.ny = mlon.shape[0] self.years = os.listdir(mpath) self.root = msg_folder self.fpath = rfiles
def composite(): pool = multiprocessing.Pool(processes=7) files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_size_zR/') # /WA30/ out = '/users/global/cornkle/papers/wavelet/saves/pandas/' #files = files[0:1500] print('Nb files', len(files)) tt = 'WA15' comp_collect = {} precip = {} res = pool.map(file_loop, files) pool.close() res = [x for x in res if x is not None] nb_sys = len(res) print('Number systems: ', nb_sys) res = [item for sublist in res for item in sublist] # flatten list of lists for v in res: comp_collect[v[2]]={'p': [], 't' : [], 'scale':[], 'hour':[], 'id' : []} precip[v[2]]=[] # ret.append((kernel, kernelt, sc, id, dic['time.hour'].values.tolist(), # clat, clon, lat_min, lat_max, lon_min, lon_max, area, # bulk_pmax, bulk_pmean, bulk_tmean, bulk_tmean_p, bulk_tmin_p, bulk_g30, # circle_Tcenter, circle_p, circle_t, circle_valid, circle_sum, # circle_nz, circle_g30, circle_max, circle_p99, circle_p95, circle_p90)) dic = OrderedDict([('scale', []), ('id' , []), ('hour' , []), ('clat',[]), ('clon',[]),('lat_min',[]), ('lat_max' , []), ('lon_min' , []), ('lon_max' , []), ('area' , []), ('bulk_pmax' , []), ('bulk_pmean' ,[]), ('bulk_tmean',[]), ('bulk_tmean_p',[]), ('bulk_tmin_p',[]), ('bulk_g30',[]), ('circle_pix' , []), ('circle_Tcentre', []), ('circle_p' , []), ('circle_t' , []), ('circle_val' , []), ('circle_sum' , []), ('circle_nz' , []), ('circle_g30' , []), ('circle_max' , []), ('circle_p99' , []), ('circle_p95' , []), ('circle_p90' , []), ('circle_val_all', []), ('circle_pc', [])]) keys = comp_collect.keys() print(keys) for v in res: print(v[2]) comp_collect[v[2]]['p'].append(v[0]) comp_collect[v[2]]['t'].append(v[1]) comp_collect[v[2]]['hour'].append(v[4]) comp_collect[v[2]]['id'].append(v[3]) for cnt, kk in enumerate(dic.keys()): dic[kk].append(v[cnt+2]) # omit kernel and kernelt precip[v[2]].extend(v[20]) pkl.dump(dic, open(out+'3dmax_gt15000_lax_nonan_dominant_fulldomain.p','wb'))
def run(): files = ua.locate(".nc", '/users/global/cornkle/VERA') pool = multiprocessing.Pool(processes=4) out = '/users/global/cornkle/VERA/blobs/' res = pool.map(file_loop, files) pool.close() res = [x for x in res if x is not None] nb_sys = len(res) print('Number systems: ', nb_sys) res = [item for sublist in res for item in sublist] # flatten list of lists # blon, blat, blon_c, blat_c, p ,pmax, pmean dic = OrderedDict([('lons', []) , ('lats', []), ('lon_c', []), ('lat_c', []), ('p', []), ('pmax', []), ('pmean', []), ('hour', []), ('area', []) ]) dic2 = OrderedDict([('lon_centre', []), ('lat_centre', []), ('pmax', []), ('pmean', []), ('hour', []), ('area', []) ]) dic3 = {'precip_pixel' : []} pick = [2,3,5,6,7,8] for v in res: for cnt, kk in enumerate(dic.keys()): dic[kk].append(v[cnt]) # omit kernel and kernelt for v in res: for cnt, kk in zip(pick,dic2.keys()): if (cnt == 0) or (cnt ==1) or (cnt ==4): continue dic2[kk].append(v[cnt]) # omit kernel and kernelt dic3['precip_pixel'].extend(v[4]) pkl.dump(dic, open(out + 'trmm_blobs_1000km2.p', 'wb')) df = pd.DataFrame.from_dict(dic2) df.to_csv(out + 'trmm_cluster.csv') df = pd.DataFrame.from_dict(dic3) df.to_csv(out + 'trmm_pixel.csv')
def rewriteBigcellTab(): path = "/users/global/cornkle/data/OBS/meteosat_WA30/bigcell_area_table/" out = path + 'rewrite/' print(out) os.system('rm ' + out + '*.txt') ok = uarr.locate("*.txt", path) for a in ok: print('Doing ' + a) tab = parseCellTables(a) minute = tab["Date"][0].minute hour = tab["Date"][0].hour tab.to_csv(out + 'cell_40c_' + str(hour).zfill(2) + str(minute).zfill(2) + '_JJAS.txt')
def perSys(): pool = multiprocessing.Pool(processes=4) tthresh = '-40' files = ua.locate( ".nc", '/users/global/cornkle/MCSfiles/WA5000_4-8N_13W-13E_' + tthresh + '_18UTC/') print('Nb files', len(files)) mdic = dictionary() #defaultdict(list) res = pool.map(file_loop, files) pool.close() # # # for f in files: # file_loop(f) # #res = [item for sublist in res for item in sublist] # flatten list of lists keys = mdic.keys() for v in res: for k in keys: try: mdic[k].append(v[k]) except TypeError: continue # if v[2]*25 > 1000000: # tplt = v[9] # tplt[np.where(tplt==np.nan)]=0 # f = plt.figure() # ax = plt.axes(projection=ccrs.PlateCarree()) # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree()) # ax.coastlines() # plt.colorbar() # ax.add_feature(cartopy.feature.BORDERS, linestyle='--') # f = plt.figure() # siz = 3 # # ax = f.add_subplot(1, 1, 1) # plt.scatter(mdic['tmin'], mdic['pmax']) # plt.title('bulk', fontsize=9) pkl.dump( mdic, open( '/users/global/cornkle/data/CLOVER/saves/bulk_' + tthresh + '_zeroRain_gt5k_-40thresh_OBSera.p', 'wb'))
def save(): files = ua.locate(".nc", '/users/global/cornkle/data/Amazon') # files = files[6:7] years = len(files) yy = np.arange(1984, 2009, 4) scale_id = 7 ylist = [] vlist = [] for i, f in enumerate(files): y = yy[i] print('Doing ' + f) array = xr.open_dataset(f) wl, scales, forest = run(array) print(scales) wwl = wl[scale_id] yarr = xr.DataArray(wwl[np.newaxis, ...], coords=[array.time, array.lat, array.lon], dims=['time', 'lat', 'lon']) veg = xr.DataArray(forest[np.newaxis, ...], coords=[array.time, array.lat, array.lon], dims=['time', 'lat', 'lon']) ylist.append(yarr) vlist.append(veg) yarr = xr.concat(ylist, dim='time') veg = xr.concat(vlist, dim='time') sc = int(scales[scale_id] / 1000.) xarr = xr.Dataset() xarr['vegfra'] = veg xarr['wav'] = yarr xarr.to_netcdf('/users/global/cornkle/amazon/nc/rhod_' + str(sc) + 'kmt.nc') print('Saved ' + '/users/global/cornkle/amazon/nc/rhod_' + str(sc) + 'kmt.nc')
def perSys(): pool = multiprocessing.Pool(processes=4) tthresh = '-40' files = ua.locate(".nc", cnst.network_data + 'MCSfiles/WA5000_4-8N_13W-13E_'+tthresh+'_18UTC/') print('Nb files', len(files)) mdic = dictionary() #defaultdict(list) res = pool.map(file_loop, files) pool.close() # # # for f in files: # file_loop(f) # #res = [item for sublist in res for item in sublist] # flatten list of lists keys = mdic.keys() for v in res: for k in keys: try: mdic[k].append(v[k]) except TypeError: continue # if v[2]*25 > 1000000: # tplt = v[9] # tplt[np.where(tplt==np.nan)]=0 # f = plt.figure() # ax = plt.axes(projection=ccrs.PlateCarree()) # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree()) # ax.coastlines() # plt.colorbar() # ax.add_feature(cartopy.feature.BORDERS, linestyle='--') # f = plt.figure() # siz = 3 # # ax = f.add_subplot(1, 1, 1) # plt.scatter(mdic['tmin'], mdic['pmax']) # plt.title('bulk', fontsize=9) pkl.dump(mdic, open(cnst.network_data + 'data/CLOVER/saves/bulk_'+tthresh+'_zeroRain_gt5k_-40thresh_OBSera_thicklayer.p', 'wb'))
def save(): files = ua.locate(".nc", '/users/global/cornkle/data/Amazon') # files = files[6:7] years = len(files) yy = np.arange(1984, 2009, 4) scale_id = 7 ylist = [] vlist = [] for i, f in enumerate(files): y = yy[i] print('Doing ' +f) array = xr.open_dataset(f) wl, scales, forest = run(array) print(scales) wwl = wl[scale_id] yarr = xr.DataArray(wwl[np.newaxis,...], coords=[array.time, array.lat, array.lon], dims=['time', 'lat', 'lon']) veg = xr.DataArray(forest[np.newaxis,...], coords=[array.time, array.lat, array.lon], dims=['time', 'lat', 'lon']) ylist.append(yarr) vlist.append(veg) yarr = xr.concat(ylist, dim='time') veg = xr.concat(vlist, dim='time') sc = int(scales[scale_id]/1000.) xarr = xr.Dataset() xarr['vegfra']=veg xarr['wav']=yarr xarr.to_netcdf('/users/global/cornkle/amazon/nc/rhod_'+str(sc)+'kmt.nc') print('Saved '+'/users/global/cornkle/amazon/nc/rhod_'+str(sc)+'kmt.nc')
def run(): pool = multiprocessing.Pool(processes=7) files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E/') # /WA30/ out = '/users/global/cornkle/C_paper/chris2016/' #files = files[0:1000] print('Nb files', len(files)) res = pool.map(file_loop, files) pool.close() res = [x for x in res if x is not None] nb_sys = len(res) print('Number systems: ', nb_sys) res = [item for sublist in res for item in sublist] # flatten list of lists dic = {'year' : [], 'month' : [], 'hour' : [], 'precip':[], 'sum30' : [], 'sum20' :[], 'sum':[], 'valid':[], 'nz':[], 'clon' : [], 'clat' : [], 'cent': [] } # big , fin,shape, sum, sumvalid, tmin for v in res: dic['year'].append(v[0]) dic['month'].append(v[1]) dic['hour'].append(v[2]) dic['precip'].append(v[3]) dic['sum30'].append(v[4]) dic['sum20'].append(v[5]) dic['sum'].append(v[6]) dic['valid'].append(v[7]) dic['nz'].append(v[8]) dic['clat'].append(v[9]) dic['clon'].append(v[10]) dic['cent'].append(v[11]) #df = pd.DataFrame(dic) #df.to_pickle(out+'3dmax_gt15000_fakeprecip_-70.pkl') pkl.dump(dic, open(out+'chris_mcs_-40_gt1000.p', 'wb'))
def perSys(): tthresh = '-50' files = ua.locate( ".nc", '/media/ck/Elements/Africa/WestAfrica/CP4/CP25_16-19UTC_future_5000km2_-50C_TCWV' ) #CP25_-50C_5000km2 print('Nb files', len(files)) for y in range(2000, 2007): yfiles = [] for f in files: if str(y) in f: yfiles.append(f) pool = multiprocessing.Pool(processes=4) mdic = dictionary() #defaultdict(list) print('Yearly files', len(yfiles)) # ipdb.set_trace() # res = pool.map(file_loop, yfiles) # pool.close() res = [] for f in yfiles: res.append(file_loop(f)) ipdb.set_trace() keys = mdic.keys() for v in res: for k in keys: try: mdic[k].append(v[k]) except TypeError: continue pkl.dump( mdic, open( cnst.network_data + 'data/CLOVER/saves/bulk_' + tthresh + '_5000km2_P25means_hourly_SAHEL_15kmprecip_WA_5-20N_-50C_TCWV_fut_' + str(y) + '.p', 'wb')) print('Saved file')
def minmax(): pool = multiprocessing.Pool(processes=5) files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA30/') print('Nb files', len(files)) res = pool.map(file_loop, files) pool.close() # res = [item for sublist in res for item in sublist] # flatten list of lists print('test') # temp = [] ptemp = [] grad = [] pgrad = [] for v in res: if v[0] == 't': temp.append(v[1]) ptemp.append(v[2]) if v[0] == 'g': grad.append(v[1]) pgrad.append(v[2]) f = plt.figure() siz = 3 ax = f.add_subplot(1, 2, 1) plt.scatter(temp, ptemp) plt.title('temp', fontsize=9) ax = f.add_subplot(1, 2, 2) plt.scatter(grad, pgrad) plt.title('grad', fontsize=9)
def readMCS_getWavelet_tresh(): files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/') # arr=np.array([15, 16, 17, 18, 19, 20, 21, 22, 24, # 25, 27, 28, 30, 32, 34, 36, 38, 40, # 42, 45, 48, 50, 53, 57, 60, 64, 67, # 71, 76, 80, 85, 90, 95, 101, 107, 113, # 120, 127, 135, 143, 151, 160, 170, 180, 190, 202], dtype=str) arr=np.array([10, 11, 11, 12,13, 13, 14,15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27, 28, 30, 32, 34, 36, 38, 40, 42, 45, 48, 50, 53, 57, 60], dtype=str) ext=np.array([10,15,20,30,60], dtype=str) rrange=list(range(arr.size)) scales=np.in1d(arr, ext) rpos=np.array(rrange)[scales] print(rpos) wave={} wave['torig0_min']=[] wave['torig1_min']=[] wave['torig2_min']=[] wave['torig3_min']=[] wave['torigx_min']=[] wave['porig_max']=[] wave['torig0']=[] wave['torig1']=[] wave['torig2']=[] wave['torig3']=[] wave['torigx']=[] wave['porig']=[] for a in ext: wave[a]={} wave[a]['twavelet0']=[] wave[a]['twavelet1']=[] wave[a]['twavelet2']=[] wave[a]['twavelet3']=[] wave[a]['twaveletx']=[] wave[a]['pwavelet']=[] wave[a]['twavelet0_max']=[] wave[a]['twavelet1_max']=[] wave[a]['twavelet2_max']=[] wave[a]['twavelet3_max']=[] wave[a]['twaveletx_max']=[] wave[a]['pwavelet_max']=[] # return wave for f in files: print('Doing file: '+f) dic = xr.open_dataset(f) # if (dic['time.hour'].values<15) or (dic['time.hour'].values>21): # print('smaller') # continue outt0=np.array(dic['tc_lag0'].values.copy()) outt1=np.array(dic['tc_lag1'].values.copy()) outt2=np.array(dic['tc_lag2'].values.copy()) outt3=np.array(dic['tc_lag3'].values.copy()) outtx=np.array(dic['tc_lagx'].values.copy()) mmeans=np.percentile(outt0[np.isfinite(outt0)], 30) print(mmeans) maxi=np.nanmin(outt0) thresh=maxi+15 outp=np.array(dic['p'].values.copy()) outp[np.isnan(outp)]=-10**-5 outt0[np.isnan(outt0)]=mmeans # -40 outt0[outt0>thresh]=mmeans outt1[np.isnan(outt1)]=mmeans outt1[outt1>thresh]=mmeans outt2[np.isnan(outt2)]=mmeans outt2[outt2>thresh]=mmeans outt3[np.isnan(outt3)]=mmeans outt3[outt3>thresh]=mmeans outtx[np.isnan(outtx)]=mmeans outtx[outtx>thresh]=mmeans # outt2[outt2>thresh]=-40#mmeans if np.mean(outt0)==mmeans: continue if np.mean(outt1)==mmeans: continue if np.mean(outt2)==mmeans: continue if np.mean(outt3)==mmeans: continue if np.mean(outtx)==mmeans: continue if not outtx[outtx<mmeans].any(): continue print('Wavelet start') wav0 = util.waveletTP(outt0, outp, 5) wav1 = util.waveletTP(outt1, outp, 5) wav2 = util.waveletTP(outt2, outp, 5) wav3 = util.waveletTP(outt3, outp, 5) wavx = util.waveletTP(outtx, outp, 5) #print(wav1['scales']) # outt0[np.where(dic['pmask'].values==0)]=mmeans # outt1[np.where(dic['pmask'].values==0)]=mmeans # outt2[np.where(dic['pmask'].values==0)]=mmeans # outt3[np.where(dic['pmask'].values==0)]=mmeans # outtx[np.where(dic['pmask'].values==0)]=mmeans wave['torig0_min'].append(np.percentile(outt0[outt0<mmeans], 1)) wave['torig1_min'].append(np.percentile(outt1[outt1<mmeans], 1)) wave['torig2_min'].append(np.percentile(outt2[outt2<mmeans], 1)) wave['torig3_min'].append(np.percentile(outt3[outt3<mmeans], 1)) wave['torigx_min'].append(np.percentile(outtx[outtx<mmeans], 1)) wave['porig_max'].append(np.percentile(outp[outp>1], 99)) wave['torig0'].append(outt0) wave['torig1'].append(outt1) wave['torig2'].append(outt2) wave['torig3'].append(outt3) wave['torigx'].append(outtx) wave['porig'].append(outp) for pos in rpos: print(arr[pos]) tt0=np.array(wav0['t'][pos,:,:]) tt1=np.array(wav1['t'][pos,:,:]) tt2=np.array(wav2['t'][pos,:,:]) tt3=np.array(wav3['t'][pos,:,:]) ttx=np.array(wavx['t'][pos,:,:]) pp=np.array(wav2['p'][pos,:,:]) tt0[np.where(dic['pmask'].values==0)]=0 tt1[np.where(dic['pmask'].values==0)]=0 tt2[np.where(dic['pmask'].values==0)]=0 tt3[np.where(dic['pmask'].values==0)]=0 ttx[np.where(dic['pmask'].values==0)]=0 wave[arr[pos]]['twavelet0'].append(tt0) wave[arr[pos]]['twavelet1'].append(tt1) wave[arr[pos]]['twavelet2'].append(tt2) wave[arr[pos]]['twavelet3'].append(tt3) wave[arr[pos]]['twaveletx'].append(ttx) wave[arr[pos]]['pwavelet'].append(pp) wave[arr[pos]]['twavelet0_max'].append(np.percentile(tt0[tt0>0], 99)) wave[arr[pos]]['twavelet1_max'].append(np.percentile(tt1[tt1>0], 99)) wave[arr[pos]]['twavelet2_max'].append(np.percentile(tt2[tt2>0], 99)) wave[arr[pos]]['twavelet3_max'].append(np.percentile(tt3[tt3>0], 99)) wave[arr[pos]]['twaveletx_max'].append(np.percentile(ttx[ttx>0], 99)) wave[arr[pos]]['pwavelet_max'].append(np.percentile(pp[pp>0], 99)) pkl.dump(wave, open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc_thresh.p', 'wb')) print('Saved!')
import pandas as pd import multiprocessing import pickle as pkl from scipy.ndimage.measurements import label import pdb dic = pkl.load( open ('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_zR.p', 'rb')) p30 = np.array(dic['po30']) lat = np.array(dic['clat']) mcs_count = np.sum(p30[(lat >= 4) & (lat<=7.5)]) files = ua.locate(".nc", '/users/global/cornkle/TRMMfiles') cnt = 0 for f in files: print('Doing ', f) xa = xr.open_dataset(f) lat = xa.lat.values lon = xa.lon.values arr = xa['p'].values arr = arr[(lat>=4) & (lat <= 7.8) & (lon >=-17) & (lon <=20)] nb = np.sum(arr >= 30) cnt += nb print('MCS frac', mcs_count/cnt)
def composite(): pool = multiprocessing.Pool(processes=7) files = ua.locate( ".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_size_zR/' ) # /WA30/ out = '/users/global/cornkle/C_paper/wavelet/saves/pandas/' #files = files[0:400] print('Nb files', len(files)) tt = 'WA15' comp_collect = {} precip = {} res = pool.map(file_loop, files) pool.close() res = [x for x in res if x is not None] nb_sys = len(res) print('Number systems: ', nb_sys) res = [item for sublist in res for item in sublist] # flatten list of lists for v in res: comp_collect[v[2]] = { 'p': [], 't': [], 'scale': [], 'hour': [], 'id': [] } precip[v[2]] = [] # ret.append((kernel, kernelt, sc, id, dic['time.hour'].values.tolist(), # clat, clon, lat_min, lat_max, lon_min, lon_max, area, # bulk_pmax, bulk_pmean, bulk_tmean, bulk_tmean_p, bulk_tmin_p, bulk_g30, # circle_Tcenter, circle_p, circle_t, circle_valid, circle_sum, # circle_nz, circle_g30, circle_max, circle_p99, circle_p95, circle_p90)) dic = OrderedDict([('scale', []), ('id', []), ('hour', []), ('clat', []), ('clon', []), ('lat_min', []), ('lat_max', []), ('lon_min', []), ('lon_max', []), ('area', []), ('bulk_pmax', []), ('bulk_pmean', []), ('bulk_tmean', []), ('bulk_tmean_p', []), ('bulk_tmin_p', []), ('bulk_g30', []), ('circle_pix', []), ('circle_Tcentre', []), ('circle_p', []), ('circle_t', []), ('circle_val', []), ('circle_sum', []), ('circle_nz', []), ('circle_g30', []), ('circle_max', []), ('circle_p99', []), ('circle_p95', []), ('circle_p90', []), ('circle_val_all', []), ('circle_pc', [])]) keys = comp_collect.keys() print(keys) for v in res: print(v[2]) comp_collect[v[2]]['p'].append(v[0]) comp_collect[v[2]]['t'].append(v[1]) comp_collect[v[2]]['hour'].append(v[4]) comp_collect[v[2]]['id'].append(v[3]) for cnt, kk in enumerate(dic.keys()): dic[kk].append(v[cnt + 2]) # omit kernel and kernelt precip[v[2]].extend(v[20]) pkl.dump(dic, open(out + '3dmax_gt15000_-60.p', 'wb')) pkl.dump(precip, open(out + 'precip_3dmax_gt15000_-60.p', 'wb')) pkl.dump(comp_collect, open(out + 'comp_collect_composite_-60.p', 'wb'))
def readMCS_getWavelet(): files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/') # arr=np.array([15, 16, 17, 18, 19, 20, 21, 22, 24, # 25, 27, 28, 30, 32, 34, 36, 38, 40, # 42, 45, 48, 50, 53, 57, 60, 64, 67, # 71, 76, 80, 85, 90, 95, 101, 107, 113, # 120, 127, 135, 143, 151, 160, 170, 180, 190, 202], dtype=str) arr = np.array([ 10, 11, 11, 12, 13, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27, 28, 30, 32, 34, 36, 38, 40, 42, 45, 48, 50, 53, 57, 60 ], dtype=str) ext = np.array([10, 15, 20, 30, 60], dtype=str) rrange = list(range(arr.size)) scales = np.in1d(arr, ext) rpos = np.array(rrange)[scales] print(rpos) wave = {} wave['torig0_min'] = [] wave['torig1_min'] = [] wave['torig2_min'] = [] wave['torig3_min'] = [] wave['torigx_min'] = [] wave['porig_max'] = [] wave['torig0'] = [] wave['torig1'] = [] wave['torig2'] = [] wave['torig3'] = [] wave['torigx'] = [] wave['porig'] = [] for a in ext: wave[a] = {} wave[a]['twavelet0'] = [] wave[a]['twavelet1'] = [] wave[a]['twavelet2'] = [] wave[a]['twavelet3'] = [] wave[a]['twaveletx'] = [] wave[a]['pwavelet'] = [] wave[a]['twavelet0_max'] = [] wave[a]['twavelet1_max'] = [] wave[a]['twavelet2_max'] = [] wave[a]['twavelet3_max'] = [] wave[a]['twaveletx_max'] = [] wave[a]['pwavelet_max'] = [] # return wave for f in files: print('Doing file: ' + f) dic = xr.open_dataset(f) # if (dic['time.hour'].values<15) or (dic['time.hour'].values>21): # print('smaller') # continue outt0 = np.array(dic['tc_lag0'].values.copy()) outt1 = np.array(dic['tc_lag1'].values.copy()) outt2 = np.array(dic['tc_lag2'].values.copy()) outt3 = np.array(dic['tc_lag3'].values.copy()) outtx = np.array(dic['tc_lagx'].values.copy()) outp = np.array(dic['p'].values.copy()) outp[np.isnan(outp)] = -10**-5 wave['porig_max'].append(np.percentile(outp[outp > 1], 99)) wave['porig'].append(outp) looparr = [outt0, outt1, outt2, outt3, outtx] strarr = ['0', '1', '2', '3', 'x'] for outt, strr in zip(looparr, strarr): wave['torig' + strr].append(outt) outt[np.isnan(outt)] = -40 wav = util.waveletTP(outt, outp, 5) outt[np.where(dic['pmask'].values == 0)] = -40 wave['torig' + strr + '_min'].append( np.percentile(outt0[outt0 < -40], 1)) for pos in rpos: print(arr[pos]) tt = np.array(wav['t'][pos, :, :]) if strr == '0': pp = np.array(wav['p'][pos, :, :]) wave[arr[pos]]['pwavelet'].append(pp) wave[arr[pos]]['pwavelet_max'].append( np.percentile(pp[pp > 0], 99)) tt[np.where(dic['pmask'].values == 0)] = 0 wave[arr[pos]]['twavelet' + strr].append(tt) wave[arr[pos]]['twavelet' + strr + '_max'].append( np.percentile(tt[tt > 0], 99)) pkl.dump( wave, open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc.p', 'wb')) print('Saved!')
def perSys(): pool = multiprocessing.Pool(processes=5) tthresh = '-10' files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA350_4-8N_14W-10E_'+tthresh+'/') print('Nb files', len(files)) mdic = defaultdict(list) res = pool.map(file_loop, files) pool.close() # #res = [item for sublist in res for item in sublist] # flatten list of lists # p=[] t=[] for v in res: try: mdic['tmin'].append(v[0]) mdic['pmax'].append(v[1]) mdic['area'].append(v[2]) mdic['ao60'].append(v[3]) mdic['tmean'].append(v[4]) mdic['pperc'].extend(v[5]) mdic['clat'].append(v[6]) mdic['po30'].append(v[7]) mdic['isfin'].append(v[8]) mdic['t'].append(v[9]) mdic['lon30'].extend(v[10]) mdic['lat30'].extend(v[11]) mdic['lonisfin'].extend(v[12]) mdic['latisfin'].extend(v[13]) mdic['hour'].append(v[14]) mdic['month'].append(v[15]) mdic['latmin'].append(v[16]) mdic['latmax'].append(v[17]) mdic['isnz'].append(v[18]) mdic['clon'].append(v[19]) mdic['p'].append(v[20]) mdic['pc'].append(v[21]) mdic['year'].append(v[22]) except TypeError: continue # if v[2]*25 > 1000000: # tplt = v[9] # tplt[np.where(tplt==np.nan)]=0 # f = plt.figure() # ax = plt.axes(projection=ccrs.PlateCarree()) # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree()) # ax.coastlines() # plt.colorbar() # ax.add_feature(cartopy.feature.BORDERS, linestyle='--') # f = plt.figure() # siz = 3 # # ax = f.add_subplot(1, 1, 1) # plt.scatter(mdic['tmin'], mdic['pmax']) # plt.title('bulk', fontsize=9) pkl.dump(mdic, open('/users/global/cornkle/data/CLOVER/saves/bulk_'+tthresh+'_zeroRain.p', 'wb'))
def __init__(self, msg_folder, y1=y1, y2=y2, months=None): yrange = range(y1, y2 + 1) # 1998, 2014 if months is None: mrange = range(1, 13) else: if len(months) > 1: mrange = months else: mrange = range(months[0], months[0] + 1) try: lpath = uarr.locate('lon.npz', msg_folder, exclude=None) spath = uarr.locate('lon_stitch.npz', msg_folder, exclude=None) except: print('Not a directory or no msg lat/lon found') return mpath = os.path.join(msg_folder, 'mfg_raw_binary') try: os.path.isdir(mpath) except: print('No mfg_raw_binary') quit() rfiles = [] for yr, mo in itertools.product( yrange, mrange): # rain_f4 files only available for 6 to 10 filepath = os.path.join(mpath, str(yr), str(mo).zfill(2)) try: files = glob.glob(mpath + os.sep + str(yr) + str(mo).zfill(2) + '*') except OSError: continue #print(rfiles) rfiles.extend(files) rfiles.sort(key=ul.natural_keys) msg_latlon = np.load(lpath[0]) mlon = msg_latlon['lon'] mlat = msg_latlon['lat'] msg_latlon_stitch = np.load(spath[0]) slon = msg_latlon_stitch['lon'] slat = msg_latlon_stitch['lat'] self.lat = mlat self.lon = mlon self.nx = mlon.shape[1] self.ny = mlon.shape[0] self.stitch_lat = slat self.stitch_lon = slon self.stitch_nx = slon.shape[1] self.stitch_ny = slon.shape[0] years = [] outfiles = [] for r in rfiles: years.append(os.path.basename(r)[0:4]) outfiles.append(r + os.sep + 'tir.gra') self.years = years self.root = msg_folder self.fpath = outfiles
def readMCS_getWavelet_label(): files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/') wave = {} strarr = ['0', '1', '2', '3', 'x'] wave['porig'] = [] wave['pw'] = [] wave['tw0'] = [] # wave['scales']=[] for st in strarr: wave['torig' + st] = [] wave['tw' + st + '_max'] = [] # wavelet value at max point wave['pw' + st + '_max'] = [] # max p wavelet in radius wave['p' + st + '_max'] = [] # max p in radius wave['p' + st + '_mean'] = [] # mean p in radius wave['t' + st + '_mean'] = [] # t mean in radius wave['t' + st + '_min'] = [] # t min in radius wave['pw' + st + '_mean'] = [] # mean p in radius wave['tw' + st + '_mean'] = [] # t mean in radius wave['scales' + st] = [] wave['pnb' + st] = [] cntmax = 0 cntin = 0 for f in files: print('Doing file: ' + f) dic = xr.open_dataset(f) # if (dic['time.hour'].values<15) or (dic['time.hour'].values>21): # print('smaller') # continue outp = np.array(dic['p'].values.copy()) wave['porig'].append(outp) for strr in strarr: outt = np.array(dic['tc_lag' + strr].values.copy()) outp[np.isnan(outp)] = -10**-5 wave['torig' + strr].append(outt) outt[np.isnan(outt)] = 150 outt[outt > -40] = 150 grad = np.gradient(outt) outt[outt > -40] = -55 o2 = outt.copy() nok = np.where(abs(grad[0]) > 80) d = 2 i = nok[0] j = nok[1] for ii, jj in zip(i, j): kernel = o2[ii - d:ii + d + 1, jj - d:jj + d + 1] # if not kernel.any(): # continue # else: o2[ii - d:ii + d + 1, jj - d:jj + d + 1] = ndimage.gaussian_filter(kernel, 3, mode='nearest') wav = util.waveletTP_localMax(o2, outp, 5) o2[np.where(dic['pmask'].values == 0)] = np.nan if strr == '0': outp[np.where(dic['pmask'].values == 0)] = np.nan wave['pw'].append(wav['p']) wave['tw' + strr].append(wav['t']) #print(wav['scales']) cntmax = cntmax + len(wav['z']) xs = [] ys = [] for ss in [2, 6, 12]: maxoutt = (outt == ndimage.minimum_filter(outt, ss, mode='constant', cval=np.amax(outt) + 1)) maxoutt = maxoutt.astype(int) ypks, xpks = np.where((maxoutt == 1) & (outt < -55)) ys.extend(ypks) xs.extend(xpks) radius = [2, 6, 12] * 5 z = wav['z'] y = wav['y'] x = wav['x'] for i in range(len(z)): zz = z[i] xx = x[i] yy = y[i] if dic['pmask'][ yy, xx] == 0: # if maximum falls in region where no TRMM exists, continue continue sc = wav['scales'][zz] if strr == '0': cntin = cntin + 1 iscale = (np.ceil(wav['scales'][zz] / 2. / 5.)).astype(int) tw = wav['t'][zz, :, :].copy() pw = wav['p'][zz, :, :].copy() #copy?? tw[np.isnan(tw)] = 0 pw[np.isnan(pw)] = 0 tw[np.where(dic['pmask'].values == 0)] = np.nan pw[np.where(dic['pmask'].values == 0)] = np.nan # pw[np.isnan(pw)]=1000 # # ax=plt.axes(projection=ccrs.PlateCarree()) # plt.contour(dic['lon'], dic['lat'], pw, levels=np.arange(500,1001,100), transform=ccrs.PlateCarree()) # plt.show() twmax = tw[yy, xx] print(twmax) #Find all indices within the local circle of radius iscale... # ... Then average over those indices xloc1 = np.arange(xx - iscale, xx + iscale + 1) yloc1 = np.arange(yy - iscale, yy + iscale + 1) xloc, yloc = np.meshgrid(xloc1, yloc1) distloc = ((xloc - xx)**2 + (yloc - yy)**2)**.5 indloc = (distloc <= iscale).nonzero() ycirc = indloc[0] - iscale + yy xcirc = indloc[1] - iscale + xx # print('pwshape',pw.shape[0], pw.shape[1] ) # print('twshape',tw.shape[0], tw.shape[1] ) noky = np.where( ycirc >= pw.shape[0]) # if the circle is off the edge if noky[0].size > 0: ycirc = np.delete(ycirc, noky) xcirc = np.delete(xcirc, noky) nokx = np.where(xcirc >= pw.shape[1]) if nokx[0].size > 0: ycirc = np.delete(ycirc, nokx) xcirc = np.delete(xcirc, nokx) tmean = np.nanmean(dic['tc_lag' + strr].values[ycirc, xcirc]) pmean = np.nanmean(dic['p'].values[ycirc, xcirc]) twmean = np.nanmean(tw[ycirc, xcirc]) pwmean = np.nanmean(pw[ycirc, xcirc]) pmax = np.nanmax(outp[ycirc, xcirc]) pwmax = np.nanmax(pw[ycirc, xcirc]) tmin = np.nanmin(outt[ycirc, xcirc]) pnb = ycirc.size wave['tw' + strr + '_max'].append(twmax) wave['pw' + strr + '_max'].append(pwmax) wave['tw' + strr + '_mean'].append(twmean) wave['pw' + strr + '_mean'].append(pwmean) wave['p' + strr + '_max'].append(pmax) wave['p' + strr + '_mean'].append(pmean) wave['t' + strr + '_mean'].append(tmean) wave['t' + strr + '_min'].append(tmin) wave['scales' + strr].append(sc) wave['pnb' + strr].append(pnb) #just append all the variables into a dictionary now! for k in wave: if isinstance(wave[k][0], np.ndarray): continue print(k) wave[k] = np.array(wave[k]) pkl.dump( wave, open( '/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_label.p', 'wb')) print('Saved!') print('Found ' + str(cntmax) + ' maxima in ' + str(len(files)) + ' systems.') print(str(cntin) + ' maxima coincided with TRMM')
def __init__(self, trmm_folder, yrange=YRANGE, mrange=MRANGE, hod=HOD, area=None): min_rain_swath = 200 min_rain_box = 200 min_tpixel = 2500 rain_thresh = 0.1 if not os.path.isdir(trmm_folder): print('Not a directory') quit() fdic = {'fpath': [], 'tmins': [], 'date': []} rfiles = [] for yr, mo in itertools.product(yrange, mrange): # rain_f4 files only available for 6 to 10 tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2)) try: files = uarr.locate('_rain_f4.gra', tpath) except OSError: continue rfiles.extend(files) rfiles.sort(key=ul.natural_keys) if not rfiles: print('No trmm files found') return # self.fpath=fdic['fpath'] # return for eachfile in rfiles: rain_str = eachfile.replace('_rain_f4', '') time_str = eachfile.replace('_rain_f4', '_time') rr = np.fromfile(time_str, dtype=np.float32) # seconds of day secmean = rr.mean() t = ut.sec_to_time(secmean) if not t.hour in hod: continue rr = np.fromfile(rain_str, dtype=np.int16) x = 49 # trmm swath is always 49 wide nb = rr.size single = int(nb / 4) # variables lon lat rainrate flag lons = rr[0:single] lats = rr[single:2 * single] rainrs = rr[2 * single:3 * single] y = int(lons.size / x) lons = np.resize(lons, (y, x)) lats = np.resize(lats, (y, x)) rainrs = np.resize(rainrs, (y, x)) lont = lons / 100. latt = lats / 100. rain = rainrs / 10. if np.sum(rain>rain_thresh) < min_rain_swath: # minimum TRMM rainfall > 0.1 in swath continue if area: box = np.where((lont > area[0]) & (lont < area[1]) & (latt > area[2]) & (latt < area[3])) if not box[0].any(): continue # print(len(box[0])) if len(box[0]) < min_tpixel: # minimum pixel overlap with TRMM and box (50000km2) continue if np.sum(rain[box]>rain_thresh) < min_rain_box: # minimum rainfall in defined box continue fdic['fpath'].append(rain_str) # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, # 0) fdic['date'].append(pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, 0)) self.fpaths = fdic['fpath'] self.dates = pd.Series(fdic['date']) self.__area = area
def readMCS_getWavelet_tresh(): files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/') # arr=np.array([15, 16, 17, 18, 19, 20, 21, 22, 24, # 25, 27, 28, 30, 32, 34, 36, 38, 40, # 42, 45, 48, 50, 53, 57, 60, 64, 67, # 71, 76, 80, 85, 90, 95, 101, 107, 113, # 120, 127, 135, 143, 151, 160, 170, 180, 190, 202], dtype=str) arr = np.array([ 10, 11, 11, 12, 13, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27, 28, 30, 32, 34, 36, 38, 40, 42, 45, 48, 50, 53, 57, 60 ], dtype=str) ext = np.array([10, 15, 20, 30, 60], dtype=str) rrange = list(range(arr.size)) scales = np.in1d(arr, ext) rpos = np.array(rrange)[scales] print(rpos) wave = {} wave['torig0_min'] = [] wave['torig1_min'] = [] wave['torig2_min'] = [] wave['torig3_min'] = [] wave['torigx_min'] = [] wave['porig_max'] = [] wave['torig0'] = [] wave['torig1'] = [] wave['torig2'] = [] wave['torig3'] = [] wave['torigx'] = [] wave['porig'] = [] for a in ext: wave[a] = {} wave[a]['twavelet0'] = [] wave[a]['twavelet1'] = [] wave[a]['twavelet2'] = [] wave[a]['twavelet3'] = [] wave[a]['twaveletx'] = [] wave[a]['pwavelet'] = [] wave[a]['twavelet0_max'] = [] wave[a]['twavelet1_max'] = [] wave[a]['twavelet2_max'] = [] wave[a]['twavelet3_max'] = [] wave[a]['twaveletx_max'] = [] wave[a]['pwavelet_max'] = [] # return wave for f in files: print('Doing file: ' + f) dic = xr.open_dataset(f) # if (dic['time.hour'].values<15) or (dic['time.hour'].values>21): # print('smaller') # continue outt0 = np.array(dic['tc_lag0'].values.copy()) outt1 = np.array(dic['tc_lag1'].values.copy()) outt2 = np.array(dic['tc_lag2'].values.copy()) outt3 = np.array(dic['tc_lag3'].values.copy()) outtx = np.array(dic['tc_lagx'].values.copy()) mmeans = np.percentile(outt0[np.isfinite(outt0)], 30) print(mmeans) maxi = np.nanmin(outt0) thresh = maxi + 15 outp = np.array(dic['p'].values.copy()) outp[np.isnan(outp)] = -10**-5 outt0[np.isnan(outt0)] = mmeans # -40 outt0[outt0 > thresh] = mmeans outt1[np.isnan(outt1)] = mmeans outt1[outt1 > thresh] = mmeans outt2[np.isnan(outt2)] = mmeans outt2[outt2 > thresh] = mmeans outt3[np.isnan(outt3)] = mmeans outt3[outt3 > thresh] = mmeans outtx[np.isnan(outtx)] = mmeans outtx[outtx > thresh] = mmeans # outt2[outt2>thresh]=-40#mmeans if np.mean(outt0) == mmeans: continue if np.mean(outt1) == mmeans: continue if np.mean(outt2) == mmeans: continue if np.mean(outt3) == mmeans: continue if np.mean(outtx) == mmeans: continue if not outtx[outtx < mmeans].any(): continue print('Wavelet start') wav0 = util.waveletTP(outt0, outp, 5) wav1 = util.waveletTP(outt1, outp, 5) wav2 = util.waveletTP(outt2, outp, 5) wav3 = util.waveletTP(outt3, outp, 5) wavx = util.waveletTP(outtx, outp, 5) #print(wav1['scales']) # outt0[np.where(dic['pmask'].values==0)]=mmeans # outt1[np.where(dic['pmask'].values==0)]=mmeans # outt2[np.where(dic['pmask'].values==0)]=mmeans # outt3[np.where(dic['pmask'].values==0)]=mmeans # outtx[np.where(dic['pmask'].values==0)]=mmeans wave['torig0_min'].append(np.percentile(outt0[outt0 < mmeans], 1)) wave['torig1_min'].append(np.percentile(outt1[outt1 < mmeans], 1)) wave['torig2_min'].append(np.percentile(outt2[outt2 < mmeans], 1)) wave['torig3_min'].append(np.percentile(outt3[outt3 < mmeans], 1)) wave['torigx_min'].append(np.percentile(outtx[outtx < mmeans], 1)) wave['porig_max'].append(np.percentile(outp[outp > 1], 99)) wave['torig0'].append(outt0) wave['torig1'].append(outt1) wave['torig2'].append(outt2) wave['torig3'].append(outt3) wave['torigx'].append(outtx) wave['porig'].append(outp) for pos in rpos: print(arr[pos]) tt0 = np.array(wav0['t'][pos, :, :]) tt1 = np.array(wav1['t'][pos, :, :]) tt2 = np.array(wav2['t'][pos, :, :]) tt3 = np.array(wav3['t'][pos, :, :]) ttx = np.array(wavx['t'][pos, :, :]) pp = np.array(wav2['p'][pos, :, :]) tt0[np.where(dic['pmask'].values == 0)] = 0 tt1[np.where(dic['pmask'].values == 0)] = 0 tt2[np.where(dic['pmask'].values == 0)] = 0 tt3[np.where(dic['pmask'].values == 0)] = 0 ttx[np.where(dic['pmask'].values == 0)] = 0 wave[arr[pos]]['twavelet0'].append(tt0) wave[arr[pos]]['twavelet1'].append(tt1) wave[arr[pos]]['twavelet2'].append(tt2) wave[arr[pos]]['twavelet3'].append(tt3) wave[arr[pos]]['twaveletx'].append(ttx) wave[arr[pos]]['pwavelet'].append(pp) wave[arr[pos]]['twavelet0_max'].append( np.percentile(tt0[tt0 > 0], 99)) wave[arr[pos]]['twavelet1_max'].append( np.percentile(tt1[tt1 > 0], 99)) wave[arr[pos]]['twavelet2_max'].append( np.percentile(tt2[tt2 > 0], 99)) wave[arr[pos]]['twavelet3_max'].append( np.percentile(tt3[tt3 > 0], 99)) wave[arr[pos]]['twaveletx_max'].append( np.percentile(ttx[ttx > 0], 99)) wave[arr[pos]]['pwavelet_max'].append(np.percentile( pp[pp > 0], 99)) pkl.dump( wave, open( '/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc_thresh.p', 'wb')) print('Saved!')
def readMCS_getWavelet_label(): files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/') wave={} strarr=['0', '1', '2', '3', 'x'] wave['porig']=[] wave['pw']=[] wave['tw0']=[] # wave['scales']=[] for st in strarr: wave['torig'+st]=[] wave['tw'+st+'_max']=[] # wavelet value at max point wave['pw'+st+'_max']=[] # max p wavelet in radius wave['p'+st+'_max']=[] # max p in radius wave['p'+st+'_mean']=[] # mean p in radius wave['t'+st+'_mean']=[] # t mean in radius wave['t'+st+'_min']=[] # t min in radius wave['pw'+st+'_mean']=[] # mean p in radius wave['tw'+st+'_mean']=[] # t mean in radius wave['scales'+st]=[] wave['pnb'+st]=[] cntmax=0 cntin=0 for f in files: print('Doing file: '+f) dic = xr.open_dataset(f) # if (dic['time.hour'].values<15) or (dic['time.hour'].values>21): # print('smaller') # continue outp=np.array(dic['p'].values.copy()) wave['porig'].append(outp) for strr in strarr: outt=np.array(dic['tc_lag'+strr].values.copy()) outp[np.isnan(outp)]=-10**-5 wave['torig'+strr].append(outt) outt[np.isnan(outt)]=150 outt[outt>-40]=150 grad=np.gradient(outt) outt[outt>-40]=-55 o2=outt.copy() nok = np.where(abs(grad[0]) > 80) d=2 i=nok[0] j=nok[1] for ii,jj in zip(i,j): kernel=o2[ii-d:ii+d+1, jj-d:jj+d+1] # if not kernel.any(): # continue # else: o2[ii-d:ii+d+1, jj-d:jj+d+1]=ndimage.gaussian_filter(kernel, 3, mode='nearest') wav = util.waveletTP_localMax(o2, outp, 5) o2[np.where(dic['pmask'].values==0)]=np.nan if strr=='0': outp[np.where(dic['pmask'].values==0)]=np.nan wave['pw'].append(wav['p']) wave['tw'+strr].append(wav['t']) #print(wav['scales']) cntmax = cntmax+len(wav['z']) xs = [] ys = [] for ss in [2, 6, 12]: maxoutt = (outt == ndimage.minimum_filter(outt,ss, mode='constant',cval=np.amax(outt)+1)) maxoutt = maxoutt.astype(int) ypks,xpks=np.where((maxoutt==1) & (outt < -55)) ys.extend(ypks) xs.extend(xpks) radius = [2, 6, 12]*5 z = wav['z'] y = wav['y'] x = wav['x'] for i in range(len(z)): zz = z[i] xx = x[i] yy = y[i] if dic['pmask'][yy,xx]==0: # if maximum falls in region where no TRMM exists, continue continue sc=wav['scales'][zz] if strr=='0': cntin = cntin+1 iscale = (np.ceil(wav['scales'][zz]/2./5.)).astype(int) tw = wav['t'][zz, :, :].copy() pw = wav['p'][zz, :, :].copy() #copy?? tw[np.isnan(tw)] = 0 pw[np.isnan(pw)] = 0 tw[np.where(dic['pmask'].values==0)]=np.nan pw[np.where(dic['pmask'].values==0)]=np.nan # pw[np.isnan(pw)]=1000 # # ax=plt.axes(projection=ccrs.PlateCarree()) # plt.contour(dic['lon'], dic['lat'], pw, levels=np.arange(500,1001,100), transform=ccrs.PlateCarree()) # plt.show() twmax=tw[yy,xx] print(twmax) #Find all indices within the local circle of radius iscale... # ... Then average over those indices xloc1 = np.arange(xx-iscale,xx+iscale+1) yloc1 = np.arange(yy-iscale,yy+iscale+1) xloc,yloc = np.meshgrid(xloc1,yloc1) distloc = ( (xloc-xx)**2 + (yloc-yy)**2 ) ** .5 indloc = (distloc <= iscale).nonzero() ycirc = indloc[0] - iscale + yy xcirc = indloc[1] - iscale + xx # print('pwshape',pw.shape[0], pw.shape[1] ) # print('twshape',tw.shape[0], tw.shape[1] ) noky=np.where(ycirc>=pw.shape[0]) # if the circle is off the edge if noky[0].size>0: ycirc=np.delete(ycirc,noky) xcirc=np.delete(xcirc,noky) nokx=np.where(xcirc>=pw.shape[1]) if nokx[0].size>0: ycirc=np.delete(ycirc,nokx) xcirc=np.delete(xcirc,nokx) tmean=np.nanmean(dic['tc_lag'+strr].values[ycirc, xcirc]) pmean=np.nanmean(dic['p'].values[ycirc, xcirc]) twmean=np.nanmean(tw[ycirc, xcirc]) pwmean=np.nanmean(pw[ycirc, xcirc]) pmax=np.nanmax(outp[ycirc, xcirc]) pwmax=np.nanmax(pw[ycirc, xcirc]) tmin=np.nanmin(outt[ycirc, xcirc]) pnb=ycirc.size wave['tw'+strr+'_max'].append(twmax) wave['pw'+strr+'_max'].append(pwmax) wave['tw'+strr+'_mean'].append(twmean) wave['pw'+strr+'_mean'].append(pwmean) wave['p'+strr+'_max'].append(pmax) wave['p'+strr+'_mean'].append(pmean) wave['t'+strr+'_mean'].append(tmean) wave['t'+strr+'_min'].append(tmin) wave['scales'+strr].append(sc) wave['pnb'+strr].append(pnb) #just append all the variables into a dictionary now! for k in wave: if isinstance(wave[k][0], np.ndarray): continue print(k) wave[k]=np.array(wave[k]) pkl.dump(wave, open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_label.p', 'wb')) print('Saved!') print('Found '+str(cntmax)+' maxima in '+str(len(files))+' systems.') print(str(cntin)+' maxima coincided with TRMM')
def perSys(): pool = multiprocessing.Pool(processes=5) tthresh = '-50' files = ua.locate( ".nc", '/users/global/cornkle/data/CP4/CLOVER/MCS_-50_1000km2_JA_sahel') print('Nb files', len(files)) mdic = defaultdict(list) res = pool.map(file_loop, files) pool.close() # #res = [item for sublist in res for item in sublist] # flatten list of lists # p = [] t = [] for v in res: try: mdic['tmin'].append(v[0]) mdic['pmax'].append(v[1]) mdic['area'].append(v[2]) mdic['ao60'].append(v[3]) mdic['tmean'].append(v[4]) mdic['pperc'].extend(v[5]) mdic['clat'].append(v[6]) mdic['po30'].append(v[7]) mdic['isfin'].append(v[8]) mdic['t'].append(v[9]) mdic['lon30'].extend(v[10]) mdic['lat30'].extend(v[11]) mdic['lonisfin'].extend(v[12]) mdic['latisfin'].extend(v[13]) mdic['hour'].append(v[14]) mdic['month'].append(v[15]) mdic['latmin'].append(v[16]) mdic['latmax'].append(v[17]) mdic['isnz'].append(v[18]) mdic['clon'].append(v[19]) mdic['p'].append(v[20]) mdic['year'].append(v[21]) mdic['date'].append(v[22]) except TypeError: continue # if v[2]*25 > 1000000: # tplt = v[9] # tplt[np.where(tplt==np.nan)]=0 # f = plt.figure() # ax = plt.axes(projection=ccrs.PlateCarree()) # plt.contourf(v[10], v[11], tplt, transform=ccrs.PlateCarree()) # ax.coastlines() # plt.colorbar() # ax.add_feature(cartopy.feature.BORDERS, linestyle='--') # f = plt.figure() # siz = 3 # # ax = f.add_subplot(1, 1, 1) # plt.scatter(mdic['tmin'], mdic['pmax']) # plt.title('bulk', fontsize=9) pkl.dump( mdic, open( '/users/global/cornkle/data/CLOVER/saves/bulk_' + tthresh + '_zeroRain_gt1k_shear_CP4_JA_sahel.p', 'wb'))
f.subplots_adjust(right=0.86) cax = f.add_axes([0.87, 0.545, 0.025, 0.415]) cb = plt.colorbar(mt, cax=cax, label='Cloud-top temperature ($^{\circ}$C)') cb.ax.tick_params(labelsize=12) cax = f.add_axes([0.87, 0.065, 0.025, 0.175]) cb = plt.colorbar(mp, cax=cax, label='Wavelet power') cb.ax.tick_params(labelsize=12) fsiz = 14 x = 0.02 plt.annotate('a)', xy=(x, 0.96), xytext=(0, 4), size=fsiz, xycoords=('figure fraction', 'figure fraction'), textcoords='offset points') plt.annotate('b)', xy=(x, 0.51), xytext=(0, 4), size=fsiz, xycoords=('figure fraction', 'figure fraction'), textcoords='offset points') plt.annotate('c)', xy=(x, 0.245), xytext=(0, 4), size=fsiz, xycoords=('figure fraction', 'figure fraction'), textcoords='offset points') plt.show() spath = '/users/global/cornkle/C_paper/wavelet/figs/paper/' plt.savefig(spath+'/method2.png', dpi=300) dic.close() plt.close('all') if __name__ == "__main__": files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/WA15_big_-40_15W-20E_size_zR/') run(files[238])
def readMCS_getWavelet(): files = ua.locate(".nc", '/users/global/cornkle/MCSfiles/') # arr=np.array([15, 16, 17, 18, 19, 20, 21, 22, 24, # 25, 27, 28, 30, 32, 34, 36, 38, 40, # 42, 45, 48, 50, 53, 57, 60, 64, 67, # 71, 76, 80, 85, 90, 95, 101, 107, 113, # 120, 127, 135, 143, 151, 160, 170, 180, 190, 202], dtype=str) arr=np.array([10, 11, 11, 12,13, 13, 14,15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27, 28, 30, 32, 34, 36, 38, 40, 42, 45, 48, 50, 53, 57, 60], dtype=str) ext=np.array([10,15,20,30,60], dtype=str) rrange=list(range(arr.size)) scales=np.in1d(arr, ext) rpos=np.array(rrange)[scales] print(rpos) wave={} wave['torig0_min']=[] wave['torig1_min']=[] wave['torig2_min']=[] wave['torig3_min']=[] wave['torigx_min']=[] wave['porig_max']=[] wave['torig0']=[] wave['torig1']=[] wave['torig2']=[] wave['torig3']=[] wave['torigx']=[] wave['porig']=[] for a in ext: wave[a]={} wave[a]['twavelet0']=[] wave[a]['twavelet1']=[] wave[a]['twavelet2']=[] wave[a]['twavelet3']=[] wave[a]['twaveletx']=[] wave[a]['pwavelet']=[] wave[a]['twavelet0_max']=[] wave[a]['twavelet1_max']=[] wave[a]['twavelet2_max']=[] wave[a]['twavelet3_max']=[] wave[a]['twaveletx_max']=[] wave[a]['pwavelet_max']=[] # return wave for f in files: print('Doing file: '+f) dic = xr.open_dataset(f) # if (dic['time.hour'].values<15) or (dic['time.hour'].values>21): # print('smaller') # continue outt0=np.array(dic['tc_lag0'].values.copy()) outt1=np.array(dic['tc_lag1'].values.copy()) outt2=np.array(dic['tc_lag2'].values.copy()) outt3=np.array(dic['tc_lag3'].values.copy()) outtx=np.array(dic['tc_lagx'].values.copy()) outp=np.array(dic['p'].values.copy()) outp[np.isnan(outp)]=-10**-5 wave['porig_max'].append(np.percentile(outp[outp>1], 99)) wave['porig'].append(outp) looparr=[outt0, outt1, outt2, outt3, outtx] strarr=['0', '1', '2', '3', 'x'] for outt, strr in zip(looparr, strarr): wave['torig'+strr].append(outt) outt[np.isnan(outt)]=-40 wav = util.waveletTP(outt, outp, 5) outt[np.where(dic['pmask'].values==0)]=-40 wave['torig'+strr+'_min'].append(np.percentile(outt0[outt0<-40], 1)) for pos in rpos: print(arr[pos]) tt=np.array(wav['t'][pos,:,:]) if strr=='0': pp=np.array(wav['p'][pos,:,:]) wave[arr[pos]]['pwavelet'].append(pp) wave[arr[pos]]['pwavelet_max'].append(np.percentile(pp[pp>0], 99)) tt[np.where(dic['pmask'].values==0)]=0 wave[arr[pos]]['twavelet'+strr].append(tt) wave[arr[pos]]['twavelet'+strr+'_max'].append(np.percentile(tt[tt>0], 99)) pkl.dump(wave, open('/users/global/cornkle/MCSfiles/save/MCS_wavelet_allyears_perc.p', 'wb')) print('Saved!')
def __init__(self, trmm_folder, yrange=YRANGE, mrange=MRANGE, hod=HOD, area=None): min_rain_swath = 200 min_rain_box = 200 min_tpixel = 2500 rain_thresh = 0.1 if not os.path.isdir(trmm_folder): print('Not a directory') quit() fdic = {'fpath': [], 'tmins': [], 'date': []} rfiles = [] for yr, mo in itertools.product( yrange, mrange): # rain_f4 files only available for 6 to 10 tpath = os.path.join(trmm_folder, str(yr), str(mo).zfill(2)) try: files = uarr.locate('.7.gra', tpath) except OSError: continue rfiles.extend(files) rfiles.sort(key=ul.natural_keys) if not rfiles: print('No trmm files found') return # self.fpath=fdic['fpath'] # return for eachfile in rfiles: rain_str = eachfile time_str = eachfile.replace('.7.', '.7_time.') try: rr = np.fromfile(time_str, dtype=np.float32) # seconds of day except FileNotFoundError: print(time_str + ' missing, continue') continue secmean = rr.mean() try: t = ut.sec_to_time(secmean) except ValueError: print('ValueError sec to time') continue if not t.hour in hod: continue rr = np.fromfile(rain_str, dtype=np.int16) x = 49 # trmm swath is always 49 wide nb = rr.size single = int(nb / 4) # variables lon lat rainrate flag lons = rr[0:single] lats = rr[single:2 * single] rainrs = rr[2 * single:3 * single] y = int(lons.size / x) lons = np.resize(lons, (y, x)) lats = np.resize(lats, (y, x)) rainrs = np.resize(rainrs, (y, x)) lont = lons / 100. latt = lats / 100. rain = rainrs / 10. if np.sum( rain > rain_thresh ) < min_rain_swath: # minimum TRMM rainfall > 0.1 in swath continue if area: box = np.where((lont > area[0]) & (lont < area[1]) & (latt > area[2]) & (latt < area[3])) if not box[0].any(): continue # print(len(box[0])) if len( box[0] ) < min_tpixel: # minimum pixel overlap with TRMM and box (50000km2) continue if np.sum(rain[box] > rain_thresh ) < min_rain_box: # minimum rainfall in defined box continue fdic['fpath'].append(rain_str) # fdic['date'].add(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, # 0) fdic['date'].append( pd.datetime(int(rain_str[-20:-16]), int(rain_str[-16:-14]), int(rain_str[-14:-12]), t.hour, t.minute, 0)) self.fpaths = fdic['fpath'] self.dates = pd.Series(fdic['date']) self.__area = area
import numpy as np from utils import u_arrays as ua from collections import OrderedDict import pandas as pd import multiprocessing import pickle as pkl from scipy.ndimage.measurements import label import pdb dic = pkl.load( open('/users/global/cornkle/C_paper/wavelet/saves/bulk_40big_zR.p', 'rb')) p30 = np.array(dic['po30']) lat = np.array(dic['clat']) mcs_count = np.sum(p30[(lat >= 4) & (lat <= 7.5)]) files = ua.locate(".nc", '/users/global/cornkle/TRMMfiles') cnt = 0 for f in files: print('Doing ', f) xa = xr.open_dataset(f) lat = xa.lat.values lon = xa.lon.values arr = xa['p'].values arr = arr[(lat >= 4) & (lat <= 7.8) & (lon >= -17) & (lon <= 20)] nb = np.sum(arr >= 30) cnt += nb print('MCS frac', mcs_count / cnt)