def pcaps_timeticks(plt, notext=False): ''' this will print properly formatted timeticks for the entire PCAPS period on a figure about the long length of a page. s2t assumes utc... ''' locations = [ s2t(2010120112), s2t(2010121512), s2t(2010123112), s2t(2011011512), # s2t(2011020112), s2t(2011020712)] labels = [''] * 5 i = 0 if not notext: for loc in locations: labels[i] = datetime.fromtimestamp(loc).strftime("%d %b %Y") i += 1 # label the axis plt.xlabel('Date') ax = plt.gca() ax.set_xticks(locations) ax.set_xticklabels(labels) # minor ticks, one per day! minorT = [] i = locations[0] while i <= locations[-1]: minorT += [i] i += 86400 ax.set_xticks(minorT, minor=True)
def h5_compress_stares(files, save, bin_size=5): """ Read the files and return a gridded data object bin_size is the distance in minutes of the bins """ doc = h5(save) doc.create(indices={'height':100}) X = [] # time dimension Y = [] # height dimension (ASSUMED TO BE 1-DIMENSIONAL!!!) Z = [] # data for fd in files: # now we are in the file # get the file time fname = fd.split('/')[-1] # file format expected: lidardata_joe_20110218.dat otime = s2t(fname[-12:-4]+"00UTC","%Y%m%d%H%Z") # files last for 1 24 hour period, so, yeah, add 86400 if otime + 86400 < self.data.begin or otime - 86400 > self.data.end: continue print 'reading',fname # well, the file shall be opened! f = open(fd) i = -2 # index; starts at -2 because the first line is heights, but we stil accumulate for line in f: i += 1 if i == -1: # then this is the heights line, if not 'NaN' in line and len(Y) == 0: # then this is a good line, save it as Y Y = line.split() # I would do something more robust if i trusted it continue tm = otime + bin_size*60 * i # each line is 5 minutes if tm < self.data.begin: continue if tm > self.data.end: # then we are done with this chirade. break # well, then this is a data line X.append(tm) Z.append([float(x) for x in line.split()]) #FIXME - make numpy, but its okay for now f.close() return (X,Y,Z)
def read(files,save): """ each file is a sounding (an ob) so, read the file, and save it at save """ #NOTE all soundings are size obs long, they must be filled in with zeros for this data format... # create the HDF5 document doc = h5(save) size = 5500 # this hopefully exceeds the size of the arrays doc.create(time2=size,pres=size,temp=size,dewpt=size,rh=size,u=size,v=size,dz=size,Z=size,lat=size,lon=size,gpsz=size) #Z=geopotenital height # now read the files! for f in sorted(files): fname = f.split('/')[-1] print 'reading',fname # get the initialization time from the filename -- risky, i know try: t0 = s2t(fname[1:15]+"UTC","%Y%m%d_%H%M%S%Z") except: #well, you do not meet our high standards for naming continue ts,hh,mm,ss,p,tc,tdc,rh,u,v,ws,wd,dz,Z,ln,lt,gpsZ = np.loadtxt(f,skiprows=14,unpack=True) # and append this data! I will trust the time seconds, instead of recomputing the time ts += t0 # but, before that, we have to make them all the same size - size long nl = np.zeros(size - ts.shape[0])-999.00 # -999 array to fluff the end ts = np.concatenate((ts,nl)) p = np.concatenate((p,nl)) tc = np.concatenate((tc,nl)) tdc = np.concatenate((tdc,nl)) rh = np.concatenate((rh,nl)) u = np.concatenate((u,nl)) v = np.concatenate((v,nl)) dz = np.concatenate((dz,nl)) ln = np.concatenate((ln,nl)) lt = np.concatenate((lt,nl)) Z = np.concatenate((Z,nl)) gpsZ = np.concatenate((gpsZ,nl)) doc.append(t0,persist=True,time2=ts,pres=p,temp=tc,dewpt=tdc,rh=rh,u=u,v=v,dz=dz,Z=Z,lat=lt,lon=ln,gpsz=gpsZ) doc.close()
def h5_compress_stares(files, save, maxdim=312): """ Read the files and return a gridded data object bin_size is the distance in minutes of the bins """ doc = h5(save) doc.create(indices={'height':maxdim},bs=maxdim,snr=maxdim,doppler=maxdim,dz=1) for fd in files: fname = fd.split('/')[-1] logging.info('reading '+fname) # file format expected: Stare_20_20110617_18.hpl 'we are only looking for the date. The hour is given elsewhere' otime = s2t(fname[-15:-7]+"UTC","%Y%m%d%Z") f = open(fd) 'Files are ~6MB, so we can just read them in, as array of lines.' lines = f.readlines() f.close() gates = int(lines[2].split()[-1]) if gates > maxdim: continue #skip those guys dz = float(lines[3].split()[-1]) i=17 while i < len(lines): try: time = float(lines[i].split()[0])*3600 + otime data = np.array([np.fromstring(lines[x],sep=' ',dtype=np.float32) for x in range(i+1,i+gates+1)]) data.resize((maxdim,4)) i+=gates+1 doc.append(time,bs=data[:,3],snr=data[:,2],doppler=data[:,1],dz=[dz],persist=True) except (KeyboardInterrupt,SystemExit): doc.close() exit() except: logging.debug('Encountered an error, with data shape: '+str(data.shape)) 'a file never recovers from this' break doc.close()
def read(files,save): """ each file is a sounding (an ob) so, read the file, and save it at save """ #NOTE all soundings are size obs long, they must be filled in with zeros for this data format... # create the HDF5 document doc = h5(save) size = 5500 # this hopefully exceeds the size of the arrays doc.create(time2=size,pres=size,temp=size,dewpt=size,rh=size,wdir=size,wspd=size,dz=size,Z=size,lat=size,lon=size,gpsz=size) #Z=geopotenital height # now read the files! for f in sorted(files): fname = f.split('/')[-1] print 'reading',fname # get the initialization time from the filename -- risky, i know try: t0 = s2t(fname[1:15]+"UTC","%Y%m%d_%H%M%S%Z") except: #well, you do not meet our high standards for naming print 'bad file name' continue ''' For the raw files, we have to go line by line... sad I know ''' ts = np.zeros(size) p = np.zeros(size) tc = np.empty(size) tdc = np.zeros(size) rh = np.empty(size) wdir = np.empty(size) wspd = np.empty(size) dz = np.empty(size) ln = np.empty(size) lt = np.empty(size) Z = np.empty(size) gpsZ = np.empty(size) l=0 #keeps track of the current line f = open(f) for line in f: if not line[9]=='S': continue #dont record pre-launch line = line.split() if not line[1] == 'S00': continue #not satisfactory #NOTE TIMES ARE NOT INCLUDED HERE!!! p[l]=line[5] tc[l]=line[6] #tdc[l]=line[7] rh[l]=line[7] wdir[l]=line[8] wspd[l]=line[9] dz[l]=line[10] ln[l]=line[11] lt[l]=line[12] Z[l]=line[13] gpsZ[l]=line[19] l+=1 f.close() doc.append(t0,persist=True,time2=ts,pres=p,temp=tc,dewpt=tdc,rh=rh,wspd=wspd,wdir=wdir,dz=dz,Z=Z,lat=lt,lon=ln,gpsz=gpsZ) doc.close()
def iop(num, buffer=False): ''' Select an IOP timetuple for the project defined iops. Available option to buffer this tuple by buffer [days] on either end Parameters ---------- num:int reference to the IOP number from PCAPS buffer: float, optional length in days to extend the tuple. ''' # organize events, and return proper interpretable time tuples out = [ (s2t('2010 12 01 12 UTC', '%Y %m %d %H %Z'), s2t('2011 02 18 00 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 01 12 UTC', '%Y %m %d %H %Z'), s2t('2010 12 07 02 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 07 12 UTC', '%Y %m %d %H %Z'), s2t('2010 12 10 15 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 12 12 UTC', '%Y %m %d %H %Z'), s2t('2010 12 14 21 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 24 00 UTC', '%Y %m %d %H %Z'), s2t('2010 12 26 21 UTC', '%Y %m %d %H %Z')), (s2t('2011 01 01 00 UTC', '%Y %m %d %H %Z'), s2t('2011 01 09 12 UTC', '%Y %m %d %H %Z')), (s2t('2011 01 11 12 UTC', '%Y %m %d %H %Z'), s2t('2011 01 17 20 UTC', '%Y %m %d %H %Z')), (s2t('2011 01 20 12 UTC', '%Y %m %d %H %Z'), s2t('2011 01 22 06 UTC', '%Y %m %d %H %Z')), (s2t('2011 01 23 12 UTC', '%Y %m %d %H %Z'), s2t('2011 01 26 12 UTC', '%Y %m %d %H %Z')), (s2t('2011 01 26 12 UTC', '%Y %m %d %H %Z'), s2t('2011 01 31 06 UTC', '%Y %m %d %H %Z')), (s2t('2011 02 02 18 UTC', '%Y %m %d %H %Z'), s2t('2011 02 05 18 UTC', '%Y %m %d %H %Z')), ] if num == 'all': return out[1:] if not buffer: return out[num] else: out = list(out[num]) out[0] = out[0] - 86400 * buffer out[1] = out[1] + 86400 * buffer return out
# label the axis plt.xlabel('Date') ax = plt.gca() ax.set_xticks(locations) ax.set_xticklabels(labels) # minor ticks, one per day! minorT = [] i = locations[0] while i <= locations[-1]: minorT += [i] i += 86400 ax.set_xticks(minorT, minor=True) # make a simple dict available for other events events = { 'pcaps':(s2t(2010120112), s2t(2011020712)), 'target1':(s2t('201012040000UTC', '%Y%m%d%H%M%Z'), s2t('201012051200UTC', '%Y%m%d%H%M%Z')), 'target2':(s2t('201101050430UTC', '%Y%m%d%H%M%Z'), s2t('201101050800UTC', '%Y%m%d%H%M%Z')), # WAVES!!! 'target3':(s2t('201012020000UTC', '%Y%m%d%H%M%Z'), s2t('201012041200UTC', '%Y%m%d%H%M%Z')), # wave breakup } aerosol_periods = [ (s2t('2010 12 01 15 UTC', '%Y %m %d %H %Z'), s2t('2010 12 03 02 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 03 12 UTC', '%Y %m %d %H %Z'), s2t('2010 12 05 02 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 05 07 UTC', '%Y %m %d %H %Z'), s2t('2010 12 05 21 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 07 12 UTC', '%Y %m %d %H %Z'), s2t('2010 12 08 09 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 08 18 UTC', '%Y %m %d %H %Z'), s2t('2010 12 09 08 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 09 14 UTC', '%Y %m %d %H %Z'), s2t('2010 12 10 00 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 13 22 UTC', '%Y %m %d %H %Z'), s2t('2010 12 14 20 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 25 17 UTC', '%Y %m %d %H %Z'), s2t('2010 12 26 06 UTC', '%Y %m %d %H %Z')), (s2t('2010 12 26 14 UTC', '%Y %m %d %H %Z'), s2t('2010 12 26 17 UTC', '%Y %m %d %H %Z')),
def read(files, save): """ each file is a sounding (an ob) so, read the file, and save it at save """ # NOTE all soundings are size obs long, they must be filled in with zeros for this data format... # create the HDF5 document doc = h5(save) size = 450 # this hopefully exceeds the size of the arrays # CPIN Files are much shorter... doc.create(pres=size, temp=size, dewpt=size, rh=size, r=size, u=size, v=size, z=size, lat=1, lon=1, theta=size, thte=size, wspd=size, wdir=size, gamma=size, stab=size, N=size, rich=size, thtdef=size, cpin=size) # those last two do not have to be included... # Z=geopotenital height # now read the files! for f in sorted(files): fname = f.split('/')[-1] # if 'smth' not in fname and NCAR not in fname: continue l.info('reading ' + fname) # launch time comes from line 2 of the file, the last element df = open(f, 'r') txt = df.read(2000).split('\n') # way more than we need df.close() latln = txt[0].split() # keys 1,2 will be what we want try: tm = s2t(txt[1].split()[-1] + 'UTC', '%Y%m%d%H%M%Z') except: # drat. print txt.split('\n')[1] continue try: if 'cpin' in fname: z, p, t, td, rh, r, wb, tv, tht, thte, thtw, ws, wd, u, v, vflg, gamma, stab, N, rich, thtdef, cpin = np.loadtxt(f, skiprows=4, unpack=True) # r is mixing ratio else: z, p, t, td, rh, r, wb, tv, tht, thte, thtw, ws, wd, u, v, vflg, gamma, stab, N, rich = np.loadtxt(f, skiprows=4, unpack=True) # r is mixing ratio except: l.warning('This file could not be read') continue # and append this data! I will trust the time seconds, instead of recomputing the time # but, before that, we have to make them all the same size - size long nl = np.zeros(size - t.shape[0]) - 999.00 # -999 array to fluff the end p = np.concatenate((p, nl)) t = np.concatenate((t, nl)) td = np.concatenate((td, nl)) rh = np.concatenate((rh, nl)) r = np.concatenate((r, nl)) tv = np.concatenate((tv, nl)) tht = np.concatenate((tht, nl)) thte = np.concatenate((thte, nl)) ws = np.concatenate((ws, nl)) wd = np.concatenate((wd, nl)) gamma = np.concatenate((gamma, nl)) stab = np.concatenate((stab, nl)) N = np.concatenate((N, nl)) rich = np.concatenate((rich, nl)) u = np.concatenate((u, nl)) v = np.concatenate((v, nl)) z = np.concatenate((z, nl)) if 'cpin' in fname: cpin = np.concatenate((cpin, nl)) thtdef = np.concatenate((thtdef, nl)) doc.append(tm, persist=True, pres=p, temp=t, dewpt=td, rh=rh, r=r, u=u, v=v, z=z, lat=[latln[1]], lon=[latln[2]], theta=tht, thte=thte, wspd=ws, wdir=wd, gamma=gamma, stab=stab, N=N, rich=rich, cpin=cpin, thtdef=thtdef) else: doc.append(tm, persist=True, pres=p, temp=t, dewpt=td, rh=rh, r=r, u=u, v=v, z=z, lat=[latln[1]], lon=[latln[2]], theta=tht, thte=thte, wspd=ws, wdir=wd, gamma=gamma, stab=stab, N=N, rich=rich) doc.close()