read = np.load(f) read = read[1:, :] labels = read[:, 0] labels = labels.astype(int) valid = labels == 1 dates = read[valid, 1] dates = dates.astype(int) hours = read[valid, 2] hours = hours.astype(int) all_vals = read[:, 3] all_vals = all_vals.astype(float) big_times = modules.date_process(dates, hours) big_times = np.array(big_times) #iterate through sites and take LOMB daily_mag_array = [] daily_phase_array = [] full_time = np.arange(0, 2191, 1. / 24) #array containing length of months from 2006 in days month_lengths = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
def run_LSP(mod_data,x): lat_i = lat_indices[x] lon_i = lon_indices[x] print lat_i,lon_i current_lat = lat_c[lat_i] current_lon = lon_c[lon_i] waveform = mod_data waveform_ave = np.average(waveform) model_date_val = np.copy(model_date) model_time_val = np.copy(model_time) time = modules.date_process(model_date_val,model_time_val,start_year) if (species.lower() != 'gmao_temp') and (species.lower() != 'gmao_psfc') and (species.lower() != 'wind_speed') and (species.lower() != 'wind_direction'): waveform = waveform*1e9 #check model vals are valid #valid = vals >= 0 #vals = vals[valid] #model_time_val = model_time[valid] #model_date_val = model_date[valid] #take 8 hour average divisor = 8 total_len = len(waveform)/divisor start = 0 end = divisor ave_waveform = [] ave_time = [] for i in range(total_len): ave = np.ma.average(waveform[start:end]) ave_time=np.append(ave_time,time[start]) ave_waveform=np.append(ave_waveform,ave) start+=divisor end+=divisor time=np.copy(ave_time) waveform=np.copy(ave_waveform) #take lsp unwindowed of waveform ua_periods,ua_mag,ua_ph,ua_fr,ua_fi = modules.take_lomb_unwindowed(time,waveform,ofac,1./24) #take out known periodic components 1,182.625, and 365.25 a priori for more accurate red noise fit. closest_daily_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i]-1.)) closest_ha_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i]-182.625)) closest_annual_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i]-365.25)) rm_indices = [closest_daily_index,closest_ha_index,closest_annual_index] ua_mag_c,ua_fr,ua_fi = redfit.sidelobe_percent_remove(np.copy(ua_mag),ua_fr,ua_fi,rm_indices,5.,ua_periods) #------------------------------------------------------------------------------- #Do IFFT of altered spectra - with significant periods removed and gaps left in real and imag components linearly interpolated. #altered spectra provides red noise estimation baseline ##use ifft to get time series back from adjusted spectra #complex Fourier spectrum which corresponds to the Lomb-Scargle periodogram: F = [0]*((len(ua_fr)*2)+1) #set first real value to average F[0] = complex(waveform_ave*len(waveform),0) #Get reverse real and imaginary values rev_ua_fr=np.copy(ua_fr[::-1]) rev_ua_fi=np.copy(ua_fi[::-1]) rev_ua_fr[0] = 0 rev_ua_fi[0] = 0 f_index = 1 #Fill Fourier Spectrum real and imaginary values for i in range(len(ua_fr)): F[f_index] = complex(ua_fr[i],ua_fi[i]) f_index+=1 for i in range(len(ua_fr)): F[f_index] = complex(rev_ua_fr[i],-rev_ua_fi[i]) f_index+=1 F = np.array(F) #Take ifft and just take real values ifft_ua_ts = numpy.fft.ifft(F) ifft_ua_ts = ifft_ua_ts.astype('float64') ifft_ua_ts_len = (len(ifft_ua_ts)/ofac) + np.mod(len(ifft_ua_ts),ofac) ifft_time = time[-ifft_ua_ts_len:] ifft_ua_ts = ifft_ua_ts[-len(waveform):] ifft_time = ifft_time-ifft_time[0] a_periods,a_mag,corr_a_mag,a_fr,a_fi,a_red_periods,a_red_mag,a_gredth,a_fac95,a_fac99,a_fac99_9,a_faccrit,a_fac_grid,a_sig_levels,a_tau,a_corr = redfit.red_background(nsim,mctest,ifft_time,ifft_ua_ts,ofac) #apply lsp correction from altered spectrum to unaltered spectrum corr_ua_mag = ua_mag/a_corr #check confidence of each point on spectrum sigs = np.zeros(len(corr_ua_mag)) last_ind = len(a_sig_levels)-1 for i in range(len(a_sig_levels)-1): conf_low = a_gredth*a_fac_grid[i] conf_up = a_gredth*a_fac_grid[i+1] current_last_ind = i+1 for j in range(len(corr_ua_mag)): if sigs[j] == 0: if (corr_ua_mag[j] >= conf_low[j]) and (corr_ua_mag[j] < conf_up[j]): sigs[j] = a_sig_levels[i] elif current_last_ind == last_ind: if corr_ua_mag[j] > conf_up[j]: sigs[j] = a_sig_levels[i+1] #get critical significance for all points on spectrum crit_sig = a_gredth*a_faccrit #get 95,99 and 99.9 % chi squared significance bands for all points on spectrum sig_95 = a_gredth*a_fac95 sig_99 = a_gredth*a_fac99 sig_99_9 = a_gredth*a_fac99_9 return (x,sigs,sig_95,sig_99,sig_99_9,crit_sig,a_gredth,corr_ua_mag,ua_periods,a_tau)
model_var_mask = np.ma.masked_where(model_var < 0, model_var) gridbox_count = len(lat_c) * len(lon_c) lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, 1., 114.) y = model_var[:, lat_n, lon_n] y = y * 1e9 #set up plot fig = plt.figure(figsize=(23, 12.3)) fig.patch.set_facecolor('white') ax = fig.add_subplot(1, 1, 1) x = modules.date_process(model_date, model_time, 2005) ofac = 4 model_periods, model_mag, model_ph, model_fr, model_fi, amp_corr = modules.take_lomb( x, y, ofac, 1. / 24) def form2(x, pos): """ This function returns a string with 3 decimal places, given the input x""" return '%.2f' % x def form5(x, pos): """ This function returns a string with 3 decimal places, given the input x""" return '%.6f' % x
GAW_switch = 'y' # Read in the model output if GAW_switch == 'y': model, names = modules.readfile_GAW( "binary_logs/GEOS_v90103_2x2.5_GAW_O3_logs.npy", model_index) #model index represents gaw location else: model, names = modules.readfile( "binary_logs/GEOS_v90103_4x5_CV_logs.npy", "001") #001 represents single location # Processes the model date date = model[:, 0] time = model[:, 1] model_time = modules.date_process(date, time) #Define sampling intervals samp_spacing = 1. / 24. #Convert model time array into numpy array model_time = np.array(model_time) counter = 0 for species in species_list: units, obs_data_name, unit_cut, species_type, actual_species_name, obs_switch, model_cut_switch, ofac = modules.obs_variable_finder( species) #set plotting area & background to white fig = plt.figure(figsize=(20, 12))
def site_iter_process(valid_refs,c): #for site_ref in valid_refs: data_valid = True site_ref = valid_refs[c] print 'ref = ',site_ref site_test = all_refs == site_ref site_yyyymmdd = yyyymmdd[site_test] site_hhmm = hhmm[site_test] site_vals = vals[site_test] #convert blank invalids to -99999 test_inv = site_vals == '' site_vals[test_inv] = -99999 site_vals = np.float64(site_vals) #convert number invalids to -99999 test_inv = site_vals < 0 site_vals[test_inv] = -99999 #put vals into full grid date_con = np.array(site_yyyymmdd).astype(int) time_con = np.array(site_hhmm).astype(int) #create max possible o3 grid full_data = np.empty(n_hours) full_data[:] = -99999 #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) syn_grid_time = np.arange(0,n_days,1./24) syn_grid_time = np.round(syn_grid_time,decimals=5) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') site_vals = np.array(site_vals) full_data[indices] = site_vals meta_index = meta_refs.index(site_ref) tz = meta_tz[meta_index] lat = np.float64(meta_lats[meta_index]) lon = np.float64(meta_lons[meta_index]) alt = np.float64(meta_alts[meta_index]) raw_class_name = meta_class[meta_index] anthrome_class_name = class_name[meta_index] #correct timezone to UTC if tz < 0: #get rid of values at start and append -99999's at end cut = full_data[:tz] for num in range(np.abs(tz)): cut = np.insert(cut,0, -99999) full_data = cut elif tz > 0: #put -99999's at start and get rid of values at end cut = full_data[tz:] for num in range(tz): cut = np.append(cut, -99999) full_data = cut #do data quality checks full_data,data_valid = modules.quality_check(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year) #set mm if species == 'O3': mm = 'ultraviolet photometry' elif (species == 'NO') or (species == 'NO2') or (species == 'CO'): mm = 'non-dispersive infrared absorption' #set sampling as average st = 'average' #set site file resolution file_res = 'H' return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def run_LSP(model_data, x): print obs_refs[x] vals = model_data #check obs vals are valid valid = vals >= 0 vals = vals[valid] model_time_val = model_time[valid] model_date_val = model_date[valid] full_times = modules.date_process(model_date,model_time,start_year) if timeres == 'M': full_times_year = full_times[:12] else: full_times_year = full_times[:8766] full_times_day = full_times[:24] valid_times = modules.date_process(model_date_val,model_time_val,start_year) site_lon = obs_lons[x] #convert site_lon to 0 to 360 degs if site_lon < 0: site_lon = 360-np.abs(site_lon) #transform from UTC time to solar time sun_time = lon_step_time*site_lon time_diff = sun_time - 0 if time_diff > 12: time_diff = time_diff-24 #make time start from 0 valid_times_from0 = modules.phase_start_correct(valid_times) periodic_periods = [1./4.,1./3.,1./2.,1.,365.25/4.,365.25/3.,365.25/2.,365.25] periods,mag,ph,fr,fi = modules.take_lomb_spec(valid_times_from0,vals,w=True,key_periods=periodic_periods) #get mean of values mean_array = np.average(vals) #correct all phases for start point (not actually being from 0 - just corrected to be) ph = modules.phase_start_point_correct_all(periodic_periods,ph,valid_times) key_diurnal_periods = [1./4.,1./3.,1./2.,1.] key_seasonal_periods = [365.25/4.,365.25/3.,365.25/2.,365.25] diurnal_mags = mag[:4] seasonal_mags = mag[4:] seasonal_phs = ph[4:] #get individual mags and phases daily_h3_mag = mag[0] daily_h2_mag = mag[1] daily_h1_mag = mag[2] orig_daily_mag = mag[3] daily_h3_ph = ph[0] daily_h2_ph = ph[1] daily_h1_ph = ph[2] orig_daily_ph = ph[3] seasonal_h3_mag = mag[4] seasonal_h2_mag = mag[5] seasonal_h1_mag = mag[6] annual_mag = mag[7] seasonal_h3_ph = ph[4] seasonal_h2_ph = ph[5] seasonal_h1_ph = ph[6] annual_ph = ph[7] #convert sub diurnal phases from UTC to solar time daily_h3_ph = modules.solar_time_phase_corrector(daily_h3_ph,6,time_diff) daily_h2_ph = modules.solar_time_phase_corrector(daily_h2_ph,24./3.,time_diff) daily_h1_ph = modules.solar_time_phase_corrector(daily_h1_ph,12,time_diff) orig_daily_ph = modules.solar_time_phase_corrector(orig_daily_ph,24,time_diff) diurnal_phs = [daily_h3_ph,daily_h2_ph,daily_h1_ph,orig_daily_ph] #convolve annual cycle and harmonics to seasonal waveform for 1 year seasonal_mag,seasonal_min_ph,seasonal_max_ph,seasonal_waveform,seasonal_ff = modules.period_convolution(key_seasonal_periods,full_times_year,seasonal_mags,seasonal_phs,mean_array) #convolve diurnal cycle and harmonics to diurnal waveform for 1 day diurnal_mag,diurnal_min_ph,diurnal_max_ph,diurnal_waveform,diurnal_ff = modules.period_convolution(key_diurnal_periods,full_times_day,diurnal_mags,diurnal_phs,mean_array) #convolve all full_mag,full_min_ph,full_max_ph,full_waveform,full_ff = modules.period_convolution(periodic_periods,full_times,mag,ph,mean_array) #convert phase to time daily_h3_ph = modules.convert_phase_units_actual_single(daily_h3_ph,6.) daily_h2_ph = modules.convert_phase_units_actual_single(daily_h2_ph,24./3.) daily_h1_ph = modules.convert_phase_units_actual_single(daily_h1_ph,12.) orig_daily_ph = modules.convert_phase_units_actual_single(orig_daily_ph,24.) diurnal_min_ph = modules.convert_phase_units_actual_single(diurnal_min_ph,24.) diurnal_max_ph = modules.convert_phase_units_actual_single(diurnal_max_ph,24.) seasonal_h3_ph = modules.convert_phase_units_actual_single(seasonal_h3_ph,3.) seasonal_h2_ph = modules.convert_phase_units_actual_single(seasonal_h2_ph,4.) seasonal_h1_ph = modules.convert_phase_units_actual_single(seasonal_h1_ph,6.) annual_ph = modules.convert_phase_units_actual_single(annual_ph,12.) seasonal_min_ph = modules.convert_phase_units_actual_single(seasonal_min_ph,12.) seasonal_max_ph = modules.convert_phase_units_actual_single(seasonal_max_ph,12.) return (x,daily_h3_mag,daily_h3_ph,daily_h2_mag,daily_h2_ph,daily_h1_mag,daily_h1_ph,orig_daily_mag,orig_daily_ph,diurnal_mag,diurnal_min_ph,diurnal_max_ph,seasonal_h3_mag,seasonal_h3_ph,seasonal_h2_mag,seasonal_h2_ph,seasonal_h1_mag,seasonal_h1_ph,annual_mag,annual_ph,seasonal_mag,seasonal_min_ph,seasonal_max_ph,mean_array,diurnal_waveform,seasonal_waveform,full_waveform)
#read in specific site data site_group = obs_root_grp.groups['cvo'] #read in variables for site obs_var = site_group.variables['o3'][:] obs_date = site_group.variables['date'][:] obs_time = site_group.variables['time'][:] obs_lat = site_group.latitude obs_lon = site_group.longitude obs_alt = site_group.altitude obs_group = site_group.process_group test = obs_var >= 0 obs_var = obs_var[test] obs_time = modules.date_process(obs_date, obs_time, 2005) obs_time = obs_time[test] obs_time = obs_time[:100] obs_var = obs_var[:100] u = np.copy(obs_time) y = np.copy(obs_var) m = len(u) # minimize (1/2) * || yhat - y ||_2^2 # subject to yhat[j] >= yhat[i] + g[i]' * (u[j] - u[i]), j, i = 0,...,m-1 # # Variables yhat (m), g (m).
def site_iter_process(valid_refs,c): #process data #for i in range(n_refs): data_valid = True site_data = data[c] site_meta = site_data[0] file_res = resolutions[c] #get data and metadata latitudes= [site_meta['LATITUDE']] longitudes = [site_meta['LONGITUDE']] altitudes = [site_meta['ALTITUDE']] land_use_classes = [site_meta['LAND_USE']] station_classes = [site_meta['STATION CATEGORY']] all_mm = [site_meta['MEASUREMENT METHOD']] if (file_res == 'hr') or (file_res == 'da'): var = np.array(site_data[1].values.tolist()) elif file_res == 'mo': all_var = np.array(site_data[1].values.tolist()) var = all_var[:,1] end_times = all_var[:,0] end_date_con = [d[:4]+d[5:7]+d[8:10] for d in end_times] end_time_con = [d[11:13]+d[14:] for d in end_times] times = site_data[1].index print times date_con = [d.strftime('%Y%m%d') for d in times] time_con = [d.strftime('%H%M') for d in times] #get ref site_ref = valid_refs[c] site_group = group_codes[c] print 'ref == %s'%(site_ref) print 'res = ',file_res #if file resolution is daily or monthly then replicate times after point, to fill hourly data array. count=0 if file_res == 'da': file_hours = len(date_con) for i in range(file_hours): current_hh = int(time_con[count][:2]) current_mm = int(time_con[count][2:]) s = datetime.datetime(year = start_year, month = 1, day = 1, hour = current_hh, minute = current_mm) e = datetime.datetime(year = start_year, month = 1, day = 2, hour = current_hh, minute = current_mm) day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1] date_con = np.insert(date_con,count+1,[date_con[count]]*23) time_con = np.insert(time_con,count+1,day_hours) var = np.insert(var,count+1,[var[count]]*23) count +=24 if file_res == 'mo': file_hours = len(date_con) for i in range(file_hours): current_year = int(date_con[count][:4]) current_month = int(date_con[count][4:6]) current_day = int(date_con[count][6:]) current_hour = int(time_con[count][:2]) next_year = int(end_date_con[i][:4]) next_month = int(end_date_con[i][4:6]) next_day = int(end_date_con[i][6:]) next_hour = int(end_time_con[i][:2]) s = datetime.datetime(year = current_year, month = current_month, day = current_day, hour = current_hour, minute = 0) e = datetime.datetime(year = next_year, month = next_month, day = next_day, hour = next_hour, minute = 0) day_date = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][:-1] day_hour = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][:-1] date_con = np.insert(date_con,count+1,day_date) time_con = np.insert(time_con,count+1,day_hour) var = np.insert(var,count+1,[var[count]]*len(day_date)) count += (len(day_date)+1) date_con = np.array(date_con).astype(int) time_con = np.array(time_con).astype(int) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data = np.empty(len(grid_dates)) full_data[:] = -99999 full_data[indices] = var #convert nans to -99999's nan_inds = np.isnan(full_data) full_data[nan_inds] = -99999 #get mode of metadata lat = np.float64(stats.mode(latitudes)[0][0]) lon = np.float64(stats.mode(longitudes)[0][0]) alt = np.float64(stats.mode(altitudes)[0][0]) land_use_class = stats.mode(land_use_classes)[0][0] raw_class_name = stats.mode(station_classes)[0][0] mm = stats.mode(all_mm)[0][0] #get measurement method if (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O331M UV Ozone Analyzer') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9800') or (mm == 'Ultraviolet (UV) photometryThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryMCV 48-AUV') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400A UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 48 CO analyser') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400E UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 300 O3 analyser') \ or (mm == 'Ultraviolet (UV) photometry342 M') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9812 O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 350E O3 analyser') or (mm == 'Ultraviolet (UV) photometryENVIRONMENT 1003 AH') or (mm == 'Ultraviolet (UV) photometryC.S.I. 3.100') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003 O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810B O3 analyser') or (mm == 'Ultraviolet (UV) photometrytoo generic') or (mm == 'Ultraviolet (UV) photometryThermo 49 CPS Ozone Primary Standard') \ or (mm == 'Ultraviolet (UV) photometryDASIBI') or (mm == 'UV fluorescencetoo generic') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003-PC O3 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 49i O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-PC O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-RS O3 analyser') or (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O341M UV Ozone Analyzer') or (mm == 'Ultraviolet (UV) photometryISEO Argopol-SAM-XAIR') \ or (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O342M UV Ozone Analyze') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 370 O3 analyser') or (mm == 'spectrophotometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-AH O3 analyser') or (mm == 'UV fluorescenceThermo 49c' ) or (mm == 'Ultraviolet (UV) photometryPHILIPS K50110/00 UV Photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 8810 O3 analyser') or (mm == 'Ultraviolet (UV) photometryPHILIPS K50094 API 400') or (mm == 'Ultraviolet (UV) photometryORION') or (mm == 'Ultraviolet (UV) photometryThermo model 49w O3 analyser') \ or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810 O3 analyser') or (mm == 'Ultraviolet (UV) photometryCOLUMBIA SCIENTIFIC IC 3100') or (mm == 'Ultraviolet (UV) photometry2008A') or (mm == 'Ultraviolet (UV) photometryThermo model 43s SO2 analyser') or (mm == 'Ultraviolet (UV) photometryMLU') or (mm == 'Ultraviolet (UV) photometryThermo model 49 O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1108 O3 analyser') or (mm == 'Ultraviolet (UV) photometryAMIBRACK') or (mm == 'Ultraviolet (UV) photometryThermo model 49c O3 analyser') or (mm == 'UV fluorescenceUNKNOWN') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400 UV photometric O3 analyser') \ or (mm == 'UV fluorescenceTeledyne API 400 UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9830 CO analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 5014') or (mm == 'Ultraviolet (UV) photometryEnvironics 300/ Environics') or (mm == 'Ultraviolet (UV) photometryANALYSIS AUTOMATION Mod. 427') or (mm == 'Ultraviolet (UV) photometryANALYSIS AUTOMATION') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008 O3 analyser') or (mm == 'ultraviolet absorptionORION') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9811 O3 analyser') or (mm == 'Ultraviolet (UV) photometryENVIRONMENT 1003RS') \ or (mm == 'UV absorption (ref)UNKNOWN') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Environnement S.A. SANOA Multigas Longpath Monitoring System') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003-RS O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 350 O3 analyser') or (mm == 'Ultraviolet (UV) photometrySFI O342M') or (mm == 'UV fluorescenceMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryDANI ENVIRONMENT 1003 AH') or (mm == 'Ultraviolet (UV) photometryS-5014') or (mm == 'Ultraviolet (UV) photometryThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryUNKNOWN') \ or (mm == 'Ultraviolet (UV) photometryHoriba model APNA 360 NOx analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'UV fluorescenceThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryContiflo') or (mm == 'Ultraviolet (UV) photometryTeledyne API undertermined') or (mm == 'UV fluorescenceThermo model 43a SO2 analyser') or (mm == 'UV fluorescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm == 'UV fluorescenceThermo model 43c SO2 analyser') \ or (mm =='Ultraviolet (UV) photometryTeledyne API undertermined') or (mm =='UV fluorescenceUNKNOWN') or (mm =='UV fluorescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm =='Ultraviolet (UV) photometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810 O3 analyser') \ or (mm == 'ChemiluminescenceTeledyne API undertermined') or (mm == 'ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360 NOx analyser') or (mm == 'ChemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC31M NO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 14B chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841 NOx analyser') or (mm == 'ChemiluminescenceENVIRONMENT ZC 32M') or (mm == 'ChemiluminescenceHoriba model APNA 300 NOx analyser') or (mm == 'chemiluminescenceENVIRONNEMENT AC 30M') \ or (mm == 'ChemiluminescenceThermo model 42i NO/Nox analyser') or (mm == 'ChemiluminescenceTeledyne API 400 UV photometric O3 analyser') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION') or (mm == 'ChemiluminescenceMonitor Labs model 8941A NOx analyser') or (mm == 'ChemiluminescenceTeledyne API undertermined') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC32M NO2 Analyzer') or (mm == 'ChemiluminescenceTeledyne API 200E chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceHoriba model APHA 360E hydrocarbons analyser') or (mm == 'ChemiluminescenceMELOY S1600') or (mm == 'ChemiluminescenceECO PHYSICS CLD 700') or (mm == 'ChemiluminescenceORION') \ or (mm == 'ChemiluminescenceTECAN CLD 502') or (mm == 'ChemiluminescenceMonitor Labs model 9850 SO2 analyser') or (mm == 'ChemiluminescenceECO PHYSICS CLD 700 AL') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC30M NO2 Analyzer') or (mm == 'ChemiluminescenceMCV 30-QL') or (mm == 'ChemiluminescenceAMBIRACK') or (mm == 'ChemiluminescenceTeledyne API 100A UV Fluorescent SO2 Analyser') or (mm == 'ChemiluminescenceS-5012') or (mm == 'ChemiluminescenceAirpointer') or (mm == 'ChemiluminescenceThermo model 42c NO/Nox analyser') or (mm == 'ChemiluminescenceThermo model 42i-TL (Trace level Nox)') or (mm == 'ChemiluminescenceMonitor Labs model 9841T NOx analyser') \ or (mm == 'ChemiluminescenceThermo model 42 NO/Nox analyser') or (mm == 'ChemiluminescenceMonitor Labs model 8841 NOx analyser') or (mm == 'ChemiluminescenceColumbia Scientific Industries Models 1600') or (mm == 'chemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION Mod. 447') or (mm == 'ChemiluminescenceSFI AC32M') or (mm == 'ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm == 'Chemiluminescenceserinus 40 Nox') or (mm == 'ChemiluminescenceThermo model 42s NO/Nox analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360 NOx analyser') or (mm == 'ChemiluminescenceThermo model 42C-TL (Trace level Nox)') \ or (mm == 'ChemiluminescenceTeledyne API 200 chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 8440 NOx analyser') or (mm == 'ChemiluminescencePHILIPS K50034 API 200A') or (mm == 'ChemiluminescenceENVIRONMENT') or (mm == 'ChemiluminescenceMonitor Labs model 8840 NOx analyser') or (mm == 'chemiluminescenceHORIBA APNA 370') or (mm == 'ChemiluminescenceMonitor Labs undetermined') or (mm == 'ChemiluminescencePHILIPS 42') or (mm == 'ChemiluminescencePHILIPS K50109/00 Gas Filter Correlation CO analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm == 'ChemiluminescenceThermo model 43 SO2 analyser') \ or (mm == 'ChemiluminescenceHoriba model APNA 350 NOx analyser') or (mm == 'ChemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceTHERMO ELECTRON INSTRUMENTS') or (mm == 'ChemiluminescenceLAP 884') or (mm == 'ChemiluminescenceMonitor Labs model 9841A NOx analyser') or (mm == 'ChemiluminescenceHoriba model APNA 370 NOx analyser') or (mm == 'ChemiluminescenceDASIBI 2108 NOx analyser') or (mm == 'ChemiluminescenceThermo model 14B/E chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AF22M SO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 42w NO/Nox analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360E NOx analyser') \ or (mm == 'Chemiluminescencetoo generic') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm == 'ChemiluminescencePHILIPS K50235/00 NO-NOx-NO2 analyser') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC31M NO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 14B chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841 NOx analyser') or (mm =='ChemiluminescenceENVIRONMENT ZC 32M') or (mm =='ChemiluminescenceBENDIX') or (mm =='ChemiluminescenceThermo model 42i NO/Nox analyser') \ or (mm =='ChemiluminescenceTeledyne API 400 UV photometric O3 analyser') or (mm =='Ultraviolet (UV) photometryHoriba model APNA 360 NOx analyser') or (mm =='ChemiluminescenceThermo model 48 CO analyser') or (mm =='ChemiluminescenceMonitor Labs model 8941A NOx analyser') or (mm =='ChemiluminescenceTeledyne API undertermined') or (mm =='ChemiluminescenceEnvironnement S.A. Model AC32M NO2 Analyzer') or (mm =='ChemiluminescenceTeledyne API 200E chemiluminescent NOx analyser') or (mm =='ChemiluminescenceHoriba model APHA 360E hydrocarbons analyser') or (mm =='ChemiluminescenceECO PHYSICS CLD 700') or (mm =='ChemiluminescenceORION') \ or (mm =='ChemiluminescenceTECAN CLD 502') or (mm =='ChemiluminescenceMonitor Labs model 9850 SO2 analyser') or (mm =='ChemiluminescenceECO PHYSICS CLD 700 AL') or (mm =='ChemiluminescenceEnvironnement S.A. Model AC30M NO2 Analyzer') or (mm =='ChemiluminescenceMCV 30-QL') or (mm =='ChemiluminescenceBendix/Combustion Engineering Model 8101-C Oxides of Nitrogen Analyze') or (mm =='ChemiluminescenceTeledyne API 100A UV Fluorescent SO2 Analyser') or (mm =='ChemiluminescenceS-5012') or (mm =='ChemiluminescenceHoriba model APNA 300E NOx analyser') or (mm =='ChemiluminescenceThermo model 42c NO/Nox analyser') \ or (mm =='ChemiluminescenceMonitor Labs model 8440 NOx analyser') or (mm =='ChemiluminescenceThermo model 42i-TL (Trace level Nox)') or (mm =='ChemiluminescenceThermo model 42 NO/Nox analyser') or (mm =='ChemiluminescenceMonitor Labs model 8841 NOx analyser') or (mm =='ChemiluminescenceColumbia Scientific Industries Models 1600') or (mm =='chemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION Mod. 447') or (mm =='ChemiluminescenceAirpointer') or (mm =='ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm =='ChemiluminescenceThermo model 42s NO/Nox analyser') or (mm =='ChemiluminescenceHoriba model APNA 360 NOx analyser') \ or (mm =='ChemiluminescenceTeledyne API 200 chemiluminescent NOx analyser') or (mm =='ChemiluminescencePHILIPS K50034 API 200A') or (mm =='ChemiluminescenceENVIRONMENT') or (mm =='ChemiluminescenceMonitor Labs model 8840 NOx analyser') or (mm =='Beta ray attenuationTeledyne API 200A chemiluminescent NOx analyser') or (mm =='ChemiluminescenceMonitor Labs undetermined') or (mm =='ChemiluminescencePHILIPS K50102 NO') or (mm =='Chemiluminescencetoo generic') or (mm =='ChemiluminescenceThermo model 42C-TL (Trace level Nox)') or (mm =='ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm =='ChemiluminescenceTHERMO ENVIRONMENTAL INSTRUMENTS') \ or (mm =='ChemiluminescenceHoriba model APNA 350 NOx analyser') or (mm =='ChemiluminescenceUNKNOWN') or (mm =='ChemiluminescenceTHERMO ELECTRON INSTRUMENTS') or (mm =='ChemiluminescenceLAP 884') or (mm =='ChemiluminescenceMonitor Labs model 9841A NOx analyser') or (mm =='ChemiluminescenceHoriba model APNA 370 NOx analyser') or (mm =='ChemiluminescenceDASIBI 2108 NOx analyser') or (mm =='ChemiluminescenceThermo model 14B/E chemiluminescence NO-NO2-Nox') or (mm =='ChemiluminescenceThermo model 42w NO/Nox analyser') or (mm =='ChemiluminescenceHoriba model APNA 360E NOx analyser') or (mm =='ChemiluminescenceEC9843') or (mm =='ChemiluminescencePHILIPS K50109/00 Gas Filter Correlation CO analyser') \ or (mm =='ChemiluminescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm =='ChemiluminescencePHILIPS K50235/00 NO-NOx-NO2 analyser') or (mm =='ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm =='ChemiluminescenceEnvironnement S.A. Model CO12M CO Analyzer') or (mm =='ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm =='ChemiluminescenceUNKNOWN') or (mm =='Chemiluminescencetoo generic') or (mm =='Beta ray attenuationMLU') or (mm =='Beta ray attenuationORION') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'UV fluorescenceHoriba model APNA 360 NOx analyser') \ or (mm == 'UV fluorescenceUNKNOWN') or (mm == 'UV fluorescenceThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser'): if species == 'O3': mm = 'ultraviolet photometry' elif (species == 'NO') or (species == 'NO2') or (species == 'CO'): mm = 'chemiluminescence' else: 1+'a' if (mm =='Non-dispersive infrared spectroscopy (NDIR)Meloy Model SA 700 Fluorescence Sulfur Dioxide Analyze') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9830B CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 8831 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ORION') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 200A chemiluminescent NOx analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ANALYSIS AUTOMATION') or (mm =='Non-dispersive infrared spectroscopy (NDIR)THERMO ELECTRON INSTRUMENTS') \ or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 43a SO2 analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 8830 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)CO ANALAYZER') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO12M CO Analyzer') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48i CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)too generic') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS K50093 API 300A') or (mm =='Non-dispersive infrared spectroscopy (NDIR)MLU') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 300 CO analyser') \ or (mm =='Non-dispersive infrared spectroscopy (NDIR)MLU 300') or (mm =='Non-dispersive infrared spectroscopy (NDIR)UNKNOWN') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ENVIRONMENT') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300 gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 49 O3 analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48w CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Maihak Unor 6N') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360E CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs undetermined') \ or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300E gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 100 UV Fluorescent SO2 Analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO10M CO Analyzer') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 350 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)FUJI ZRC') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API undertermined') or (mm =='Non-dispersive infrared spectroscopy (NDIR)S-5006') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 350E CO analyser') \ or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48c CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 42 NO/Nox analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)SFI CO12M') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360CE CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS 48') or (mm =='Non-dispersive infrared spectroscopy (NDIR)DASIBI 3008 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300A gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 370 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO11M CO Analyzer') \ or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9841A NOx analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)AAL 407') or (mm =='Non-dispersive infrared spectroscopy (NDIR)AMBIRACK') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9830 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 300E CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS K50109/00 Gas Filter Correlation CO analyser') or (mm =='UNKNOWNTeledyne API 300 gas filter correlation CO analyser') or (mm =='UNKNOWNHoriba model APMA 350 CO analyser') \ or (mm =='Infrared gas filter correlationTHERMO ELECTRON INSTRUMENTS 48c') or (mm =='Infrared gas filter correlationHoriba model APMA 360 CO analyser') or (mm =='infrared absorptionUNKNOWN') or (mm =='Infrared gas filter correlationUNKNOWN') or (mm =='Infrared gas filter correlationTeledyne API 300E gas filter correlation CO analyser'): mm = 'non-dispersive infrared spectroscopy' if (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Opsis AR500 Open path monitor') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)UNKNOWN') or (mm == 'Ultraviolet (UV) photometryDOAS') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Environnement S.A. SANOA Multigas Longpath Monitoring System'): mm = 'differential optical absorption spectrosocopy' if (mm == 'flame photometryThermo model 48 CO analyser') or (mm == 'flame photometryTeledyne API 300 gas filter correlation CO analyser'): mm = 'flame photometry' if (mm == 'Gas Chromatography (ref)UNKNOWN') or (mm == 'chromatographyUNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GUNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GEnvironnement VOC71M') or (mm == 'chromatographyMonitor Labs model 8440 NOx analyser') or (mm == 'Gas chromatography (GC) + flame ionisation (GC-FID)UNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GAIRMOZONE') or (mm =='Gas chromatography followed by flame ionization detection (GVarian Chrompack') or (mm =='chromatographyChrompack BTX CP7001 Monitor') or (mm =='Gas chromotography (GC)UNKNOWN'): mm = 'gas chromatography flame ionisation detection' if (mm == "Griess-Saltzman reactionLipinski's aspirator") or (mm == 'Griess-Saltzman reaction101') or (mm == 'Griess-Saltzman reactionUNKNOWN') or (mm == "UNKNOWNLipinski's aspirator") or (mm == 'Griess-Saltzman reactionBUBBLER 24 H') or (mm == "Griess-Saltzman reactionLipinski's aspirator AGT24") or (mm == 'Griess-Saltzman reactionfilter pack') or (mm == 'NEDA Griess-Yloswayaspirator') or (mm == 'colorimetryUNKNOWN'): mm = 'griess saltzman colorimetric' if (mm == 'SpectrophotometrySequential Air Sampler, Type SS2000. NaI-impregnated glass sinters') or (mm == 'SpectrophotometryGlass tubes') or (mm == 'Spectrophotometryglass_sinter') or (mm =='Spectrophotometryfilter pack') or (mm == 'SpectrophotometryUNKNOWN') or (mm == 'Spectrophotometryphotocolorimeter') or (mm == "SpectrophotometryLipinski's aspirator") or (mm == 'SpectrophotometryBUBBLER 24 H') or (mm == 'SpectrophotometryIMPREGNATED FILTER') or (mm == 'Spectrophotometryglass filter') or (mm == 'spectrophotometryUNKNOWN'): mm = 'spectrophotometry' if (mm == 'SpectrometryBUBBLER 24 H') or (mm == 'Atomic absorption spectrometry (AAS)UNKNOWN'): mm = 'spectrometry' if (mm == 'Ion chromatographyIMPREGNATED FILTER'): mm = 'ion chromatography' if (mm == 'diffusive samplerUNKNOWN') or (mm == 'UNKNOWNSEQUENTIAL SAMPLER') or (mm == 'TGS-ANSAFILTER'): mm = 'diffusive sampler' if (mm == 'Flame ionization detection (FID)Chrompack CP9000'): mm = 'flame ionisation detection' if (mm == 'coulometryUNKNOWN'): mm = 'coulometry' if (mm == 'Gas chromatography + mass spectrometry (GC-MS)AF 20 M') or (mm == 'GAS CHROMATOGRAPHY - MASS SPECTROMETRYUNKNOWN') or (mm == 'Gas chromatography + mass spectrometry (GC-MS)UNKNOWN') or (mm == 'Gas chromatography + mass spectrometry GC-MS after solvent oMarkes Thermal Desorber + Agilent gas Chromatograph Mass Spectrometer'): mm = 'gas chromatography mass spectrometry' if (mm == 'Gas chromatography with photo ionization detectorSYNTECH SPECTRAS GC 955 series undetermined') or (mm == 'Gas chromatography with photo ionization detectorUNKNOWN'): mm = 'gas chromatography photo ionization detection' #if measurement type is unknown then set default measurement method for species try: if (np.isnan(mm) == True): if species == 'O3': mm = 'ultraviolet photometry' elif (species == 'NO') or (species == 'NO2'): mm = 'chemiluminescence' elif species == 'CO': mm = 'non-dispersive infrared spectroscopy' elif species == 'ISOP': mm == 'gas chromatography flame ionisation detection' except: if (mm == 'UNKNOWNUNKNOWN'): if species == 'O3': mm = 'ultraviolet photometry' elif (species == 'NO') or (species == 'NO2'): mm = 'chemiluminescence' elif species == 'CO': mm = 'non-dispersive infrared spectroscopy' elif species == 'ISOP': mm == 'gas chromatography flame ionisation detection' #do data quality checks full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year) #convert file res to standard format if file_res == 'hr': file_res = 'H' elif file_res == 'da': file_res = 'D' elif file_res == 'mo': file_res = 'M' #set sampling as average st = 'average' anthrome_class_name = 'na' return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def site_iter_process(valid_refs,c): #for each valid location process #limit obs data due for each site in valid_obs_site_names #for c in range(len(valid_refs)): all_lat = [] all_lon = [] all_alt = [] all_st = [] all_mm = [] site_ref = valid_refs[c] file_valid = True data_valid = True print site_ref file_res = data_resolutions[c] print file_res #read files for each valid site s_files = sorted(glob.glob('/work/home/db876/observations/surface/%s/GAW/%s**.%s**.dat'%(species,site_ref.lower(),file_res))) print s_files if file_res == 'hr': site_files = sorted(s_files, key = lambda x: x.split(".hr")[1]) else: site_files = sorted(s_files) delete_inds = [] if file_res == 'hr': #limit site files before and after year limit for i in range(len(site_files)): f = site_files[i] year = f.split(".hr")[1][:4] if int(year) < int(start_year): delete_inds.append(i) if int(year) > int(end_year): delete_inds.append(i) site_files = np.delete(site_files,delete_inds) print site_files if len(site_files) == 0: print 'No valid files in date range. Skipping.' data_valid = False return c,[],data_valid,-999,-999,-999,'na','na','na','na','na',-999 site_file_len = len(site_files) s_count = 0 start_ind = 0 end_ind = 0 for f in site_files: print f read = np.loadtxt(f,dtype="S10,S5,f8",comments='C',usecols=(0,1,4),unpack =True) read = np.array(read) dates = read[0,:] times = read[1,:] conc = read[2,:] conc = np.array(conc) conc = conc.astype(float) #change all vals < 0 to np.NaN inv_test = conc < 0 conc[inv_test] = np.NaN start_ind = end_ind end_ind+=len(conc) s_count+=1 units = [] mycsv = csv.reader(open(f)) row_count = 0 for row in mycsv: if row_count == 11: val = " ".join(row) lat = val.replace(" ", "") lat = lat[12:] lat = float(lat) all_lat.append(lat) # get lon if row_count == 12: val = " ".join(row) lon = val.replace(" ", "") lon = lon[13:] lon = float(lon) all_lon.append(lon) # get altitude if row_count == 13: val = " ".join(row) alt = val.replace(" ", "") alt = alt[12:] alt = float(alt) all_alt.append(alt) # get units if row_count == 20: val = " ".join(row) unit = val.replace(" ", "") unit = unit[19:] # get measurement method if row_count == 21: val = " ".join(row) mm = val.replace(" ", "") mm = mm[21:] all_mm.append(mm) # get sampling type if row_count == 22: val = " ".join(row) st = val.replace(" ", "") st = st[16:] all_st.append(st) if row_count == 23: val = " ".join(row) tz = val.replace(" ", "") tz = tz[12:] row_count+=1 # test if units are in ppb for each file - if not convert if (unit != 'ppb') & (unit != 'ppbv'): if (unit == 'ug/m3') or (unit == 'ugN/m3'): print 'converting units, temp = 20degC' #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10) conc = conv_fact*conc elif (unit == 'ug/m3-20C') or (unit == 'ugN/m3-20C'): print 'converting units, temp = 20degC' #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10) conc = conv_fact*conc elif (unit == 'ug/m3-25C') or (unit == 'ugN/m3-25C') or (unit == 'ug/m3at25C'): print 'converting units, temp = 25degC' #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10) conc = conv_fact*conc elif (unit == 'mg/m3-20C') or (unit == 'mgN/m3-20C'): print 'converting units, temp = 25degC' #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10) conc = (conv_fact*conc)*1e3 elif (unit == 'mg/m3-25C') or (unit == 'mgN/m3-25C'): print 'converting units, temp = 25degC' #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10) conc = (conv_fact*conc)*1e3 elif (unit == 'ppm') or (unit == 'ppmv'): conc = conc*1.e3 elif (unit == 'ppt') or (unit == 'pptv'): conc = conc/1.e3 else: print 'Unknown Unit' print unit 1+'a' break if tz != 'UTC': if tz == '': if site_ref.lower() in ['plm']: tz = -5 if site_ref.lower() in ['kos','edm','vdl','nwr']: tz = 0 if site_ref.lower() in ['jfj','kps','rig','pay','glh','cmn','zep','dig','hhe','ktb','stp','ivn','jcz','kam','lzp','snz','zbl','kmw','don','mhn','nia','roq','spm']: tz = 1 if site_ref.lower() in ['rcv','aht','oul','uto','vir','fdt','sem','stn']: tz = 2 if site_ref.lower() in ['dak']: tz = 3 if site_ref.lower() in ['shp']: tz = 4 if site_ref.lower() in ['isk']: tz = 5 if site_ref.lower() in ['hkg']: tz = 8 if site_ref.lower() in ['cgo']: tz = 10 else: tz = tz.replace('LocaltimeUTC', '') tz = tz.replace('OtherUTC', '') tz = tz.replace('Localtime', '') tz = tz.replace(':', '.') try: before, sep, after = tz.rpartiton('.') after = int(after) conv = (100./60) * after tz = before+sep+str(conv) except: 1+1 tz = float(tz) else: tz = 0 #check tz is whole number else skip site if (tz % 1) != 0: print 'File Invalid, timezone is not a whole number.' conc[:] = -99999 #process dates from date, time to days since start year dates = [s.replace('-', '') for s in dates] times = [s.replace(':', '') for s in times] if file_res == 'hr': #some times go from 0100 to 2400, assume this is when sites report ave for hour previous. Thus all times should have hour minused for i in range(len(times)): if times[i] == '2400': current_date = dates[i] test = np.array(dates) == current_date indices = [i for i, x in enumerate(test) if x] for x in indices: current_time = times[x] if current_time == '2400': current_time = '0000' date_datetime = datetime.datetime(int(current_date[0:4]),int(current_date[4:6]),int(current_date[6:]),int(current_time[:2]),int(current_time[2:])) date_datetime = date_datetime - datetime.timedelta(hours = 1) times[x] = date_datetime.strftime("%H%M") #adjust dates and times if tz is not equal to 0 if tz != 0: for i in range(len(dates)): #create datetime dt = datetime.datetime(int(dates[i][:4]),int(dates[i][4:6]),int(dates[i][6:]),int(times[i][:2]),int(times[i][2:])) if tz > 0: #print 'Old dt', dt dt = dt - datetime.timedelta(hours = int(tz)) #print 'New dt', dt elif tz < 0: #print 'Old dt', dt dt = dt + datetime.timedelta(hours = np.abs(int(tz))) #print 'New dt', dt dates[i] = dt.strftime("%Y%m%d") times[i] = dt.strftime("%H%M") data = [dates,times,conc] try: big_list = np.hstack((big_list,data)) except: big_list = np.array(data) if (s_count == site_file_len): #make sure big list exists try: big_list except: data_valid = False if data_valid == True: #get dates and times date_con = big_list[0,:] time_con = big_list[1,:] #get vals vals = np.array(big_list[2,:]).astype(float) #delete big list del big_list #if dates outside what asked for exclude first_date_val = int('%s0101'%(start_year)) last_date_val = int('%s1231'%(end_year)) test_valid = (np.array(date_con).astype(int) >= first_date_val) & (np.array(date_con).astype(int) <= last_date_val) date_con = date_con[test_valid] time_con = time_con[test_valid] vals = vals[test_valid] #Check if any times are duplicate, if so delete all but first del_list = [] for d in range(len(date_con)-1): if (date_con[d] == date_con[d+1]) & (time_con[d] == time_con[d+1]): del_list.append(d+1) if len(del_list) > 0: print 'Deleting duplicate timepoints' print date_con[del_list],time_con[del_list] date_con = np.delete(date_con,del_list) time_con = np.delete(time_con,del_list) vals = np.delete(vals,del_list) #if file resolution is daily or monthly then replicate times after point, to fill hourly data array. count=0 if file_res == 'da': file_hours = len(date_con) for i in range(file_hours): current_hh = int(time_con[count][:2]) current_mm = int(time_con[count][2:]) s = datetime.datetime(year = start_year, month = 1, day = 1, hour = current_hh, minute = current_mm) e = datetime.datetime(year = start_year, month = 1, day = 2, hour = current_hh, minute = current_mm) day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1] date_con = np.insert(date_con,count+1,[date_con[count]]*23) time_con = np.insert(time_con,count+1,day_hours) vals = np.insert(vals,count+1,[vals[count]]*23) count +=24 if file_res == 'mo': file_hours = len(date_con) for i in range(file_hours): current_year = int(date_con[count][:4]) current_month = int(date_con[count][4:6]) next_month = current_month+1 if next_month > 12: next_month = 1 next_year = current_year+1 else: next_year = current_year s = datetime.datetime(year = current_year, month = current_month, day = 1, hour = 1, minute = 0) e = datetime.datetime(year = next_year, month = next_month, day = 1, hour = 0, minute = 0) day_date = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][:-1] day_hour = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][:-1] date_con = np.insert(date_con,count+1,day_date) time_con = np.insert(time_con,count+1,day_hour) vals = np.insert(vals,count+1,[vals[count]]*len(day_date)) count += (len(day_date)+1) date_con = np.array(date_con).astype(int) time_con = np.array(time_con).astype(int) #create max possible o3 grid o3_data = np.empty(n_hours) o3_data[:] = -99999 #delete dates,times and var outside date range val_test = (date_con >= int(output_res_dates_strings[0])) & (date_con <= int(output_res_dates_strings[-1])) date_con = date_con[val_test] time_con = time_con[val_test] vals = vals[val_test] print date_con #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) syn_grid_time = np.arange(0,n_days,1./24) syn_grid_time = np.round(syn_grid_time,decimals=5) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') o3_data[indices] = vals #convert all Nans back to -99999 test = np.isnan(o3_data) o3_data[test] = -99999 #get mode of metadata lat = np.float64(stats.mode(all_lat)[0][0]) lon = np.float64(stats.mode(all_lon)[0][0]) alt = np.float64(stats.mode(all_alt)[0][0]) st = stats.mode(all_st)[0][0] mm = stats.mode(all_mm)[0][0] #check site is not urban using anthrome map from 2000 anthfile = '/work/home/db876/plotting_tools/core_tools/anthro2_a2000.nc' anthload = Dataset(anthfile) class_valid,anthrome_class_name = modules.anthrome_classify(anthload,[lat],[lon]) if class_valid == 'invalid': data_valid = False print 'Site Invalid, site classed as urban by anthrome map.' #get measurement type and sampling type (take mode from collected list) if (st == 'continuous') or (st == 'continuous(carbondioxide),remotespectroscopicmethod(methaneandsurfaceozone)' or (st == 'continuous(carbondioxide)remotespectroscopicmethod(methaneandsurfaceozone)')): st = 'average' elif st == 'flask': st = 'flask' elif st == 'filter': st = 'filter' else: print st 1+'a' if mm == 'Lightabsorptionanalysis(UV)': mm = 'ultraviolet photometry' elif mm == 'CavityRingdownSpectroscopy': mm = 'cavity ringdown spectroscopy' elif mm == 'NDIR': site_mm = 'non-dispersive infrared spectroscopy' elif (mm == 'GasChromatography(FID)'): site_mm = 'gas chromatography flame ionisation detection' elif (mm == 'Gas Chromatography (RGD)'): site_mm = 'gas chromatography reduction gas detection' elif mm == 'Chemiluminescence': mm = 'chemiluminescence' elif (mm == 'Spectrophotometry') or (mm == 'spectrophotometry,naphthyl-ethylenediaminedihydrochloridemethod'): mm = 'spectrophotometry' elif mm == 'continuous(carbondioxide)remotespectroscopicmethod(methaneandsurfaceozone)': mm = 'near infrared spectroscopy' elif mm == '': if species == 'O3': mm = 'ultraviolet photometry' if species == 'CO': mm = 'non-dispersive infrared spectroscopy' if species == 'NO2': mm = 'chemiluminescence' if species == 'NO': mm = 'chemiluminescence' if species == 'ISOP': mm = 'gas chromatography flame ionisation detection' #do data quality checks full_data,data_valid,data_complete = modules.quality_check_periodic(o3_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year) #convert file res to standard format if file_res == 'hr': file_res = 'H' elif file_res == 'da': file_res = 'D' elif file_res == 'mo': file_res = 'M' #no raw class so set as na raw_class_name = 'na' return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete
def site_iter_process(valid_refs, c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] ref = valid_refs[c] print 'ref = ', ref, c #get site instrument for species met_i = file_refs.index(ref) file_name = met_refs[met_i] site_name = met_sitenames[met_i] print site_name site_species = list(met_species[met_i]) print site_species site_instruments = list(met_instruments[met_i]) m_method = site_instruments[site_species.index(species)] site_resolutions = [] data_valid = True s_files = insensitive_glob( '/work/home/db876/observations/surface/%s/EANET/*%s.csv' % (fname_species, file_name)) site_files = [] for y in year_array: for f in s_files: if str(y)[-2:] in f: site_files.append(f) site_files = modules.natsorted(site_files) years = [] months = [] days = [] hours = [] vals = [] yyyymmdd = [] hhmm = [] n_dup_array = [] last_year_index = len(site_files) for y in year_array: got_year = False for file in site_files: last_file_split = file.split('/')[-1] if str(y)[2:] in last_file_split: got_year = True break if got_year == False: timedelta_diff = datetime.date(y + 1, 1, 1) - datetime.date( y, 1, 1) ndays_missing = timedelta_diff.days continue print file valid = True with open(file, 'rb') as f: reader = csv.reader(f, delimiter=',') counter = 0 #get resolution for row in reader: if counter == 0: all_units = row elif counter == 1: file_res = 'H' try: hour_index = row.index('Hour') except: file_res = 'D' try: day_index = row.index('Day') except: file_res = 'M' month_index = row.index('Month') year_index = row.index('Year') try: spec_index = row.index(species.upper()) unit = all_units[spec_index] except: valid = False break #make sure each year units are ppb if unit != 'ppb': print 'Units not ppb!' 1 + 'a' if counter == 2: if file_res == 'H': yyyy = row[year_index] mm = row[month_index] dd = row[day_index] hh = row[hour_index] elif file_res == 'D': yyyy = row[year_index] mm = row[month_index] dd = row[day_index] hh = 1 elif file_res == 'M': yyyy = row[year_index] mm = row[month_index] dd = 1 hh = 1 start_datetime = datetime.datetime(int(yyyy), int(mm), int(dd), int(hh)) if counter == 3: if file_res == 'H': yyyy = row[year_index] mm = row[month_index] dd = row[day_index] hh = row[hour_index] elif file_res == 'D': yyyy = row[year_index] mm = row[month_index] dd = row[day_index] hh = 1 elif file_res == 'M': yyyy = row[year_index] mm = row[month_index] dd = 1 hh = 1 present_datetime = datetime.datetime( int(yyyy), int(mm), int(dd), int(hh)) time_delt = present_datetime - start_datetime hour_delt = datetime.timedelta(hours=1) day_delt = datetime.timedelta(hours=24) week_delt = datetime.timedelta(hours=24 * 7) month_delt = datetime.timedelta(hours=24 * 28) print time_delt if (time_delt < day_delt): print 'Hourly Data' file_res = 'H' site_resolutions.append(file_res) elif (time_delt > hour_delt) & (time_delt < week_delt): print 'Daily Data' file_res = 'D' site_resolutions.append(file_res) elif (time_delt > week_delt): print 'Monthly Data' file_res = 'M' site_resolutions.append(file_res) counter += 1 #READ IN DATA if valid == True: #limit to sites with hourly date files for, if required if output_res == 'H': if file_res != 'H': print 'Not processing as only want hourly files' continue if output_res == 'HD': if file_res == 'M': print 'Not processing as only want hourly and daily files' continue with open(file, 'rb') as f: reader = csv.reader(f, delimiter=',') counter = 0 val_count = 0 for row in reader: if counter >= 2: yyyy = row[year_index] mm = row[month_index] #add to n_obs_all n_all += 1 n_after_nometa += 1 if file_res == 'H': try: vals = np.append(vals, np.float64(row[spec_index])) except: vals = np.append(vals, -99999) current_datetime = present_datetime + relativedelta( hours=val_count) yyyymmdd.append( current_datetime.strftime("%Y%m%d")) hhmm.append(current_datetime.strftime("%H%M")) n_dup_array = np.append(n_dup_array, 0) elif file_res == 'D': try: vals = np.append( vals, [np.float64(row[spec_index])] * 24) except: vals = np.append(vals, [-99999] * 24) current_datetime = present_datetime + relativedelta( days=val_count) next_datetime = present_datetime + relativedelta( days=val_count + 1) all_datetimes = pd.date_range(current_datetime, next_datetime, freq='H')[:-1] for d in all_datetimes: yyyymmdd.append(d.strftime("%Y%m%d")) hhmm.append(d.strftime("%H%M")) n_dup_array = np.append(n_dup_array, 0) n_dup_array = np.append(n_dup_array, [1] * 23) elif file_res == 'M': month_days = monthrange(int(yyyy), int(mm))[1] try: vals = np.append( vals, [np.float64(row[spec_index])] * (month_days * 24)) except: vals = np.append(vals, [-99999] * (month_days * 24)) current_datetime = present_datetime + relativedelta( months=int(mm) - 1) next_datetime = present_datetime + relativedelta( months=int(mm)) all_datetimes = pd.date_range(current_datetime, next_datetime, freq='H')[:-1] for d in all_datetimes: yyyymmdd.append(d.strftime("%Y%m%d")) hhmm.append(d.strftime("%H%M")) n_dup_array = np.append(n_dup_array, 0) n_dup_array = np.append(n_dup_array, [1] * ((month_days * 24) - 1)) val_count += 1 counter += 1 else: print 'Species is not in file header. Skipping Year' timedelta_diff = datetime.date(y + 1, 1, 1) - datetime.date( y, 1, 1) ndays_missing = timedelta_diff.days print 'ndays missing = ', ndays_missing #test if have no data due to not required time resolution, if so exit if len(vals) == 0: n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_obs_after_anyvaliddata, inv_nokeymeta, n_obs_after_nokeymeta, inv_resolution, n_obs_after_resolution, inv_badmeasurementmethod, n_obs_after_badmeasurementmethod = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, 'nothourly', np.zeros( 0) #create max possible grid full_data = np.empty(n_hours) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #convert blank values to -99999 test_inv = vals == '' vals[test_inv] = -99999 #convert number invalids to -99999 test_inv = vals < 0 vals[test_inv] = -99999 #if all site resolutions are same continue then take first file_res all_same = all(x == site_resolutions[0] for x in site_resolutions) if all_same == True: file_res = site_resolutions[0] else: #otherwise take lowest frequency res as file_res if 'M' in site_resolutions: file_res = 'M' elif 'D' in site_resolutions: file_res = 'D' else: file_res = 'H' #get meta i_ref = file_refs.index(ref) site_ref = ref data_tz = np.float32(met_tz[i_ref]) all_tz = [data_tz] lat = np.float32(met_lats[i_ref]) lon = np.float32(met_lons[i_ref]) alt = np.float32(met_alts[i_ref]) raw_class_name = met_class[i_ref] country = met_country[i_ref] unit = str(unit) contact = 'Ayako Aoyagi, Asia Center for Air Pollution Research, [email protected]' #adjust dates and times if tz is not equal to 0 tz = int(data_tz) if tz != 0: for i in range(len(yyyymmdd)): #create datetime dt = datetime.datetime(int(yyyymmdd[i][:4]), int(yyyymmdd[i][4:6]), int(yyyymmdd[i][6:]), int(hhmm[i][:2]), int(hhmm[i][2:])) if tz > 0: dt = dt - datetime.timedelta(hours=int(tz)) elif tz < 0: dt = dt + datetime.timedelta(hours=np.abs(int(tz))) yyyymmdd[i] = dt.strftime("%Y%m%d") hhmm[i] = dt.strftime("%H%M") #put vals into full grid date_con = np.array(yyyymmdd).astype(int) time_con = np.array(hhmm).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] vals = vals[test_inds] n_dup_array = n_dup_array[test_inds] #set st_big and mm_big st_big = ['continuous'] * len(vals) mm_big = [m_method] * len(vals) #get obs valid test = vals >= 0 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = int(len(vals[test]) - valid_hours_dup) n_after_flagsandlod += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con, time_con, start_year) converted_time = np.round(converted_time, decimals=5) syn_grid_time = np.arange(0, n_days, 1. / 24) syn_grid_time = np.round(syn_grid_time, decimals=5) raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') vals = np.array(vals) full_data_after_flagsandlod[raw_indices] = vals raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) # test and remove duplicate and overlap points converted_time, vals, mm_big, st_big, n_dup_array = modules.remove_duplicate_points( site_ref, converted_time, vals, mm_big, st_big, n_dup_array, output_res) test = vals >= 0 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = int(len(vals[test]) - valid_hours_dup) n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = vals big_n_dup_array[indices] = n_dup_array key_meta = [lat, lon, alt] #get sampling/instrument grids raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup( site_ref, process_group, species, raw_st, raw_mm, full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list, unknown_mm_refs_list, no2_type) #do quality checks data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control( site_ref, species, file_res, no2_type, grid_dates, full_data, big_n_dup_array, valid_hours_dup, raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) #make tz int after checks data_tz = np.float32(data_tz) #set processed unit p_unit = 'pbbv' #get local timezone try: local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24 - int(datetime_offset.seconds / 60 / 60)) else: local_tz = int(datetime_offset.seconds / 60 / 60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref) unknown_local_tz_list.append(site_ref) #pack meta meta = [ lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz, local_tz, site_name, country, contact ] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
def onpick(event): global pl global ind global fig2 ind = event.ind ind = ind[0] #x_data = event.xdata #y_data = event.ydata #find ind of closest lat/lon #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data) try: for i in range(len(pl)): pl.pop(0).remove() first_run = False except: first_run = True pass pl = m.plot([X[ind]], [Y[ind]], 'o', ms=12, alpha=0.6, color='yellow', zorder=20) #get model timeseries for site clicked lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, obs_lats[ind], obs_lons[ind]) model_var_pick = model_var[:, lat_n, lon_n] model_var_pick = model_var_pick * 1e9 model_var_mask = np.ma.masked_where(model_var_pick <= 0, model_var_pick) if model_name == 'MACC': model_time_pd = pd.date_range(start=model_datetimes[0], end=model_datetimes[-1], freq='H') count = 0 valids = [] for i in range(len(model_time_pd)): if count == 0: valids.append(i) count += 1 elif count == 2: count = 0 else: count += 1 model_time_pd = model_time_pd[valids] model_var_pd = pd.Series(model_var_mask, index=model_time_pd) else: model_time_pd = pd.date_range(start=model_datetimes[0], end=model_datetimes[-1], freq='H') model_var_pd = pd.Series(model_var_mask, index=model_time_pd) #get obs timeseries for site clicked ref = obs_refs[ind] obs_ts_group = obs_root_grp.groups[ref] obs_var = obs_ts_group.variables[species.lower()][:] group = obs_ts_group.process_group lat = obs_ts_group.latitude lon = obs_ts_group.longitude lon = obs_ts_group.longitude alt = obs_ts_group.altitude complete = obs_ts_group.completeness a_class = obs_ts_group.anthrome_class r_class = obs_ts_group.raw_class continent = loc_dict[tags[ind]] country = obs_ts_group.country obs_var_mask = np.ma.masked_where(obs_var <= 0, obs_var) obs_time_pd = pd.date_range(start=obs_datetimes[0], end=obs_datetimes[-1], freq='H') obs_var_pd = pd.Series(obs_var_mask, index=obs_time_pd) #create sine wave from amp/phase obs_date_l = obs_date.astype(int) obs_time_l = obs_time.astype(int) obs_times = modules.date_process(obs_date_l, obs_time_l, start_year) obs_times = np.array(obs_times) pi2 = np.pi * 2 #convert phases to radians calc = pi2 / 6. obs_ha_phase_r = obs_ha_phase[ind] * calc calc = pi2 / 12. obs_annual_phase_r = obs_annual_phase[ind] * calc ha_obs_wave = obs_ha_mag[ind] * (np.cos((pi2 * obs_times / (365.25 / 2.)) - (obs_ha_phase_r))) annual_obs_wave = obs_annual_mag[ind] * (np.cos((pi2 * obs_times / (365.25)) - (obs_annual_phase_r))) seasonal_obs_wave = (ha_obs_wave + annual_obs_wave) + obs_ave[ind] obs_seasonal_wave_pd = pd.Series(seasonal_obs_wave, index=obs_time_pd) #create sine wave from amp/phase mod_date_l = model_date.astype(int) mod_time_l = model_time.astype(int) mod_times = modules.date_process(mod_date_l, mod_time_l, start_year) mod_times = np.array(mod_times) pi2 = np.pi * 2 #convert phases to radians calc = pi2 / 6. model_ha_phase_r = model_ha_phase[ind] * calc calc = pi2 / 12. model_annual_phase_r = model_annual_phase[ind] * calc ha_model_wave = model_ha_mag[ind] * (np.cos((pi2 * mod_times / (365.25 / 2.)) - (model_ha_phase_r))) annual_model_wave = model_annual_mag[ind] * (np.cos( (pi2 * mod_times / (365.25)) - (model_annual_phase_r))) seasonal_model_wave = (ha_model_wave + annual_model_wave) + model_ave[ind] model_seasonal_wave_pd = pd.Series(seasonal_model_wave, index=model_time_pd) #get spectra data site_group_obs = root_grp_obs_spec.groups[ref] site_group_mod = root_grp_mod_spec.groups[ref] obs_period = site_group_obs.variables['period'][:] mod_period = site_group_mod.variables['period'][:] obs_amp = site_group_obs.variables['amplitude'][:] mod_amp = site_group_mod.variables['amplitude'][:] fig.canvas.draw() if first_run == False: plt.close(fig2) fig2, (axo, axo2) = plt.subplots(2, figsize=(24, 12)) fig2.patch.set_facecolor('white') #fig2 = plt.figure() axo.plot_date(obs_time_pd.to_pydatetime(), obs_var_pd, color='black', markersize=3, label='Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='red', alpha=0.5, markersize=3, label='%s %s %s %s' % (model_name, version, grid_size, met), markeredgecolor='None') axo.plot_date(obs_time_pd.to_pydatetime(), obs_seasonal_wave_pd, color='yellow', markersize=3, label='Obs Seasonal Waveform', markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_seasonal_wave_pd, color='green', markersize=3, label='Model Seasonal Waveform', markeredgecolor='None') axo2.loglog(obs_period, obs_amp, color='black', label='Obs') axo2.loglog(mod_period, mod_amp, color='red', label='%s %s %s %s' % (model_name, version, grid_size, met)) axo2.text(0.01, 0.95, 'Obs D Amp = %8.2f ppb' % (obs_daily_mag[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.92, 'Model D Amp = %8.2f ppb' % (model_daily_mag[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.85, 'Obs HA Amp = %8.2f ppb' % (obs_ha_mag[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.82, 'Model HA Amp = %8.2f ppb' % (model_ha_mag[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.75, 'Obs A Amp = %8.2f ppb' % (obs_annual_mag[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.72, 'Model A Amp = %8.2f ppb' % (model_annual_mag[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.55, 'Obs D Phase = %8.2f' % (obs_daily_phase[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.52, 'Model D Phase = %8.2f' % (model_daily_phase[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.45, 'Obs HA Phase = %8.2f' % (obs_ha_phase[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.42, 'Model HA Phase = %8.2f' % (model_ha_phase[ind]), transform=axo2.transAxes, fontweight='bold', color='red') obs_a_ph = obs_annual_phase[ind] mod_a_ph = model_annual_phase[ind] if obs_a_ph > 12: obs_a_ph = obs_a_ph - 12. if mod_a_ph > 12: mod_a_ph = mod_a_ph - 12. axo2.text(0.01, 0.35, 'Obs A Phase = %8.2f' % (obs_a_ph), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.32, 'Model A Phase = %8.2f' % (mod_a_ph), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.15, 'Obs Ave = %8.2f ppb' % (obs_ave[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.12, 'Model Ave = %8.2f ppb' % (model_ave[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.axvline(1., ymin=0, ymax=1, color='blue', linestyle='--') axo2.axvline(182.625, ymin=0, ymax=1, color='blue', linestyle='--') axo2.axvline(365.25, ymin=0, ymax=1, color='blue', linestyle='--') axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter)) plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter)) axo.set_title( 'Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s' % (ref, country, continent, group, lat, lon, alt, complete, a_class, r_class, lat_n, lon_n)) plt.legend(loc='lower right') plt.tight_layout() axo.grid() axo2.grid() plt.show() else: #fig2 = plt.figure() fig2, (axo, axo2) = plt.subplots(2, figsize=(24, 12)) fig2.patch.set_facecolor('white') axo.plot_date(obs_time_pd.to_pydatetime(), obs_var_pd, color='black', markersize=3, label='Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='red', markersize=3, alpha=0.5, label='%s %s %s %s' % (model_name, version, grid_size, met), markeredgecolor='None') axo.plot_date(obs_time_pd.to_pydatetime(), obs_seasonal_wave_pd, color='yellow', markersize=3, label='Obs Seasonal Waveform', markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_seasonal_wave_pd, color='green', markersize=3, label='Model Seasonal Waveform', markeredgecolor='None') axo2.loglog(obs_period, obs_amp, color='black', label='Obs') axo2.loglog(mod_period, mod_amp, color='red', label='%s %s %s %s' % (model_name, version, grid_size, met)) axo2.text(0.01, 0.95, 'Obs D Amp = %8.2f ppb' % (obs_daily_mag[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.92, 'Model D Amp = %8.2f ppb' % (model_daily_mag[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.85, 'Obs HA Amp = %8.2f ppb' % (obs_ha_mag[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.82, 'Model HA Amp = %8.2f ppb' % (model_ha_mag[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.75, 'Obs A Amp = %8.2f ppb' % (obs_annual_mag[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.72, 'Model A Amp = %8.2f ppb' % (model_annual_mag[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.55, 'Obs D Phase = %8.2f' % (obs_daily_phase[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.52, 'Model D Phase = %8.2f' % (model_daily_phase[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.45, 'Obs HA Phase = %8.2f' % (obs_ha_phase[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.42, 'Model HA Phase = %8.2f' % (model_ha_phase[ind]), transform=axo2.transAxes, fontweight='bold', color='red') obs_a_ph = obs_annual_phase[ind] mod_a_ph = model_annual_phase[ind] if obs_a_ph > 12: obs_a_ph = obs_a_ph - 12. if mod_a_ph > 12: mod_a_ph = mod_a_ph - 12. axo2.text(0.01, 0.35, 'Obs A Phase = %8.2f' % (obs_a_ph), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.32, 'Model A Phase = %8.2f' % (mod_a_ph), transform=axo2.transAxes, fontweight='bold', color='red') axo2.text(0.01, 0.15, 'Obs Ave = %8.2f ppb' % (obs_ave[ind]), transform=axo2.transAxes, fontweight='bold') axo2.text(0.01, 0.12, 'Model Ave = %8.2f ppb' % (model_ave[ind]), transform=axo2.transAxes, fontweight='bold', color='red') axo2.axvline(1., ymin=0, ymax=1, color='blue', linestyle='--') axo2.axvline(182.625, ymin=0, ymax=1, color='blue', linestyle='--') axo2.axvline(365.25, ymin=0, ymax=1, color='blue', linestyle='--') axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter)) plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter)) axo.set_title( 'Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s' % (ref, country, continent, group, lat, lon, alt, complete, a_class, r_class, lat_n, lon_n)) plt.legend(loc='lower right') plt.tight_layout() axo.grid() axo2.grid() plt.show()
def site_iter_process(valid_refs, c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] #process data for each site at a time site_ref = valid_refs[c] data_valid = True print 'ref = ', site_ref, c #get all files for ref all_files = glob.glob( '/work/home/db876/observations/surface/O3/SEARCH/%s*' % (site_ref)) file_years = [i[-8:-4] for i in all_files] #sort files all_files = [x for (y, x) in sorted(zip(file_years, all_files))] dates = [] times = [] site_vals = [] print all_files for f in all_files: print f if f[-3:] == 'xls': spec_str = species flag_str = '%s FL' % (species) date_str = 'DATE/TIME' all_data = get_data(f) all_data = all_data.values() headers = all_data[0][2] date_ind = headers.index(date_str) spec_ind = headers.index(spec_str) flag_ind = headers.index(flag_str) data_cut = all_data[0][3:] for i in range(len(data_cut)): row_cut = data_cut[i] if len(row_cut) < 30: diff = 30 - len(row_cut) for x in range(diff): row_cut.append('') dates.append(row_cut[date_ind].strftime("%Y%m%d")) times.append(row_cut[date_ind].strftime("%H%M")) try: val = np.float64(row_cut[spec_ind]) except: val = -99999 if (row_cut[flag_ind] == 'I') or (row_cut[flag_ind] == 'C') or (val < 0): site_vals.append(-99999) else: site_vals.append(val) elif f[-3:] == 'csv': date_str = 'Date/Time[LST]' spec_str = 'Average %s[ppb]' % (species) flag_str = 'Flags[%s]' % (species) mycsv = csv.reader(open(f), delimiter=',') start_read = 999999 row_count = 0 for row in mycsv: try: if row[0] == date_str: date_ind = 0 spec_ind = row.index(spec_str) flag_ind = row.index(flag_str) start_read = row_count + 1 except: pass if row_count >= start_read: dates.append( parser.parse(row[date_ind]).strftime("%Y%m%d")) times.append(parser.parse(row[date_ind]).strftime("%H%M")) #dates.append(row[date_ind][6:10]+row[date_ind][0:2]+row[date_ind][3:5]) #times.append(row[date_ind][11:13]+row[date_ind][14:]) if ('I' in row[flag_ind]) or ('C' in row[flag_ind]) or ( row[flag_ind] == 'Null') or (np.float64(row[spec_ind]) < 0): site_vals.append(-99999) else: site_vals.append(np.float64(row[spec_ind])) row_count += 1 site_vals = np.array(site_vals) #adjust dates and times if tz is not equal to 0 data_tz = tz_dict[site_ref] if data_tz != 0: for i in range(len(dates)): #create datetime dt = datetime.datetime(int(dates[i][:4]), int(dates[i][4:6]), int(dates[i][6:]), int(times[i][:2]), int(times[i][2:])) if data_tz > 0: dt = dt - datetime.timedelta(hours=int(data_tz)) elif data_tz < 0: dt = dt + datetime.timedelta(hours=np.abs(int(data_tz))) dates[i] = dt.strftime("%Y%m%d") times[i] = dt.strftime("%H%M") #add val to total obs count n_all += len(site_vals) n_after_nometa += len(site_vals) #put vals into full grid date_con = np.array(dates).astype(int) time_con = np.array(times).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] site_vals = site_vals[test_inds] #set st_big as 'continuous' st_big = ['continuous'] * len(site_vals) #set mm_big if species == 'O3': mm_big = ['ultraviolet photometry'] * len(site_vals) elif species == 'NO': mm_big = ['chemiluminescence'] * len(site_vals) elif species == 'NO2': mm_big = ['chemiluminescence (conversion-photolysis)'] * len(site_vals) elif species == 'CO': mm_big = ['non-dispersive infrared spectroscopy'] * len(site_vals) #get obs valid after flagsandlod test = site_vals >= 0 n_obs_valid = len(site_vals[test]) n_after_flagsandlod += n_obs_valid #create max possible grid full_data = np.empty(n_hours) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con, time_con, start_year) converted_time = np.round(converted_time, decimals=5) syn_grid_time = np.arange(0, n_days, 1. / 24) syn_grid_time = np.round(syn_grid_time, decimals=5) raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') site_vals = np.array(site_vals) full_data_after_flagsandlod[raw_indices] = site_vals raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) #test and remove duplicate and overlap points converted_time, site_vals, mm_big, st_big, na = modules.remove_duplicate_points( site_ref, converted_time, site_vals, mm_big, st_big, 'blank', output_res) test = site_vals >= 0 n_obs_valid = int(len(site_vals[test])) print 'n obs valid = ', n_obs_valid n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = site_vals #get site meta lat = lat_dict[site_ref] lon = lon_dict[site_ref] alt = alt_dict[site_ref] unit = 'ppb' raw_class_name = raw_class_dict[site_ref] site_name = sitename_dict[site_ref] country = 'United States' contact = '*****@*****.**' all_tz = [data_tz] key_meta = [lat, lon, alt] #set site file resolution as hourly file_res = 'H' #get sampling/instrument grids raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup( site_ref, process_group, species, raw_st, raw_mm, full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list, unknown_mm_refs_list, no2_type) #do quality checks data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control( site_ref, species, file_res, no2_type, grid_dates, full_data, big_n_dup_array, 0, raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) #set processed unit p_unit = 'pbbv' #get local timezone try: local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24 - int(datetime_offset.seconds / 60 / 60)) else: local_tz = int(datetime_offset.seconds / 60 / 60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref) unknown_local_tz_list.append(site_ref) #pack meta meta = [ lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz, local_tz, site_name, country, contact ] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
def site_iter_process(valid_refs,c): #for site_ref in valid_refs: site_ref = valid_refs[c] data_valid = True print 'ref = ',site_ref site_test = all_refs == site_ref site_yyyymmdd = yyyymmdd[site_test] site_hhmm = hhmm[site_test] site_vals = vals[site_test] site_vals = np.float64(site_vals) #convert all invalids to -99999 test_inv = site_vals < 0 site_vals[test_inv] = -99999 #put vals into full grid date_con = np.array(site_yyyymmdd).astype(int) time_con = np.array(site_hhmm).astype(int) #create max possible o3 grid full_data = np.empty(n_hours) full_data[:] = -99999 #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) syn_grid_time = np.arange(0,n_days,1./24) syn_grid_time = np.round(syn_grid_time,decimals=5) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') site_vals = np.array(site_vals) full_data[indices] = site_vals meta_index = meta_refs.index(site_ref) tz = float(meta_tz[meta_index]) lat = np.float64(meta_lats[meta_index]) lon = np.float64(meta_lons[meta_index]) alt = np.float64(meta_alts[meta_index]) raw_class_name = meta_class[meta_index] anthrome_class_name = class_name[meta_index] #check tz is whole number else skip site if (tz % 1) != 0: data_valid = False print 'Timezone is not a whole number. Skipping.' #correct timezone to UTC tz = int(tz) if tz < 0: #get rid of values at start and append -99999's at end cut = full_data[:tz] for num in range(np.abs(tz)): cut = np.insert(cut,0, -99999) full_data = cut elif tz > 0: #put -99999's at start and get rid of values at end cut = full_data[tz:] for num in range(tz): cut = np.append(cut, -99999) full_data = cut #if species is CO then convert units from ppmv to ppbv if species == 'CO': valid_inds = full_data != -99999 full_data[valid_inds] = full_data[valid_inds]*1e3 #do data quality checks full_data,data_valid = modules.quality_check(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year) #set sampling as average if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'): st = 'average' elif (species == 'ISOP'): st = 'flask' #set site file resolution if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'): file_res = 'H' elif (species == 'ISOP'): file_res = 'D' #check file res is ok for output res if (output_res == 'H'): if (file_res == 'D') or (file_res == 'M'): print 'File resolution has to be Minimum Hourly. Skipping' data_valid = False return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na' elif (output_res == 'D'): if (file_res == 'M'): print 'File resolution has to be Minimum Daily. Skipping' data_valid = False return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na' #set mm if species == 'O3': mm = 'ultraviolet photometry' elif (species == 'NO') or (species == 'NO2'): mm = 'chemiluminescence' elif species == 'CO': mm = 'non-dispersive infrared spectrometry' elif species == 'ISOP': mm = 'gas chromatography flame ionisation detection' return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
% (first_year, last_year + 1, ' '.join(i for i in valid_refs))) #read in specific site data site_group = root_grp.groups[site_ref] #read in variables for site obs_var = site_group.variables['o3'][:] full_obs_var = obs_var[:] full_obs_var_mask = np.ma.masked_where(full_obs_var <= 0, full_obs_var) obs_date = site_group.variables['date'][:] obs_time = site_group.variables['time'][:] obs_lat = site_group.latitude obs_lon = site_group.longitude obs_alt = site_group.altitude obs_times = modules.date_process(obs_date, obs_time) obs_times = np.array(obs_times) obs_times_full = obs_times[:] ##cut out invalid obs data obs_var_mask = np.ma.masked_where(obs_var <= 0, obs_var) valids = obs_var > 0 obs_var = obs_var[valids] obs_times = obs_times[valids] obs_ave = np.average(obs_var) year_val = [] month_val = [] day_val = [] hour_val = []
#---------------------------------------- #find model data gridbox to compare with obs. #get model gridbox for obs site lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, obs_lat, obs_lon) model_var = model_var[:, lat_n, lon_n] model_var = model_var * 1e9 model_var_mask = np.ma.masked_where(model_var <= 0, model_var) model_ave = np.ma.average(model_var_mask) #-------------------------------------------- #take half daily average of obs and model obs_time = modules.date_process(obs_date, obs_time, start_year) model_time = modules.date_process(model_date, model_time, start_year) divisor = 6 #take half daily average of obs total_len = len(obs_var_mask) / divisor start = 0 end = divisor ave_obs_var = [] ave_obs_time = [] for i in range(total_len): ave = np.ma.average(obs_var_mask[start:end]) ave_obs_time = np.append(ave_obs_time, obs_time[start]) ave_obs_var = np.append(ave_obs_var, ave) start += divisor
def onpick(event): global pl global ind global fig2 ind = event.ind print 'ind = ',ind ind = ind[0] #x_data = event.xdata #y_data = event.ydata #find ind of closest lat/lon #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data) try: for i in range(len(pl)): pl.pop(0).remove() first_run = False except: first_run = True pass pl = m.plot([linear_lons[ind]], [linear_lats[ind]], 's', ms=20, alpha=0.6, color='yellow',zorder=20) #get model timeseries for site clicked lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,linear_lats[ind],linear_lons[ind]) model_var_pick = model_var[:,lat_n,lon_n] model_var_pick = model_var_pick*1e9 model_var_mask = np.ma.masked_where(model_var_pick<=0,model_var_pick) model_time_pd = pd.date_range(start = model_datetimes[0],end = model_datetimes[-1], freq = 'H') model_var_pd = pd.Series(model_var_mask, index=model_time_pd) #create sine wave from amp/phase model_date_l = model_date.astype(int) model_time_l = model_time.astype(int) model_times = modules.date_process(model_date_l,model_time_l,start_year) model_times = np.array(model_times) pi2 = np.pi*2 ratio = 100./annual_amp[lat_n,lon_n] ha_percent = ratio*annual_amp[lat_n,lon_n] #convert phases to radians calc = pi2/24. calc = pi2/6. ha_ph_r = ha_ph[lat_n,lon_n] * calc calc = pi2/12. annual_ph_r = annual_ph[lat_n,lon_n] * calc ha_model_wave = ha_amp[lat_n,lon_n]*(np.cos((pi2*model_times/(365.25/2.))-(ha_ph_r))) annual_model_wave = annual_amp[lat_n,lon_n]*(np.cos((pi2*model_times/(365.25))-(annual_ph_r))) ha_primary = p_ha_ph[lat_n,lon_n] ha_secondary = s_ha_ph[lat_n,lon_n] ha_model_wave = ha_model_wave+ave[lat_n,lon_n] annual_model_wave = annual_model_wave+ave[lat_n,lon_n] model_ha_wave_pd = pd.Series(ha_model_wave, index=model_time_pd) model_annual_wave_pd = pd.Series(annual_model_wave, index=model_time_pd) fig.canvas.draw() if first_run == False: plt.close(fig2) fig2, (axo) = plt.subplots(1,figsize=(24,12)) fig2.patch.set_facecolor('white') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='black', markersize = 3, label = 'Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_ha_wave_pd, color='green', markersize = 3, label = 'Ha Waveform',markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_annual_wave_pd, color='red', markersize = 3, label = 'Annual Waveform',markeredgecolor='None') #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent)) plt.legend(loc = 'lower right') plt.tight_layout() axo.grid() plt.show() else: fig2, (axo) = plt.subplots(1,figsize=(24,12)) fig2.patch.set_facecolor('white') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='black', markersize = 3, label = 'Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_ha_wave_pd, color='green', markersize = 3, label = 'Ha Waveform',markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_annual_wave_pd, color='red', markersize = 3, label = 'Annual Waveform',markeredgecolor='None') #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent)) plt.legend(loc = 'lower right') plt.tight_layout() axo.grid() plt.show()
def site_iter_process(valid_refs, c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] data_valid = True site_data = data[c] site_meta = site_data[0] file_res = resolutions[c] #get data and metadata try: lat = np.float32(site_meta['LATITUDE']) except: lat = 'na' try: lon = np.float32(site_meta['LONGITUDE']) except: lon = 'na' try: alt = np.float32(site_meta['ALTITUDE']) except: alt = 'na' land_use_class = site_meta['LAND_USE'] if pd.isnull(land_use_class) == True: land_use_class = 'na' station_class = site_meta['STATION CATEGORY'] if pd.isnull(station_class) == True: station_class = 'na' raw_class_name = land_use_class + ' ' + station_class mm = site_meta['MEASUREMENT METHOD'] if pd.isnull(mm) == True: mm = '' country = site_meta['COUNTRY/TERRITORY'] if pd.isnull(country) == True: country = 'na' site_name = site_meta['STATION NAME'] if pd.isnull(site_name) == True: site_name = 'na' continuous_check = site_meta['MEASUREMENT AUTOMATIC'] if pd.isnull(continuous_check) == True: continuous_check = 'na' unit = site_meta['MEASUREMENT UNIT'] #integration_time = site_meta['TIME INTERVAL'] tz = site_meta['TIME ZONE'] contact = '*****@*****.**' #convert timezone from str to int tzd = {'UTC': 0, 'CET': 1, 'EET': 2} data_tz = tzd[tz] all_tz = [data_tz] if (file_res == 'hr') or (file_res == 'da'): var = np.array(site_data[1].values.tolist()) elif file_res == 'mo': all_var = np.array(site_data[1].values.tolist()) var = np.array(all_var[:, 1]).astype('float64') end_times = all_var[:, 0] end_date_con = [d[:4] + d[5:7] + d[8:10] for d in end_times] end_time_con = [d[11:13] + d[14:] for d in end_times] times = site_data[1].index date_con = [d.strftime('%Y%m%d') for d in times] time_con = [d.strftime('%H%M') for d in times] #get ref site_ref = valid_refs[c] site_group = group_codes[c] print 'ref == %s, %s' % (site_ref, c) print 'res = ', file_res #add var to total obs count n_all += len(var) n_after_nometa += len(var) #if file resolution is daily or monthly then replicate times after point, to fill hourly data array. count = 0 if file_res == 'hr': n_dup_array = np.zeros(len(var)) elif file_res == 'da': n_dup_array = [] file_hours = len(date_con) for i in range(file_hours): current_hh = int(time_con[count][:2]) current_mm = int(time_con[count][2:]) s = datetime.datetime(year=start_year, month=1, day=1, hour=current_hh, minute=current_mm) e = datetime.datetime(year=start_year, month=1, day=2, hour=current_hh, minute=current_mm) day_hours = [ d.strftime('%H%M') for d in pd.date_range(s, e, freq='H') ][1:-1] date_con = np.insert(date_con, count + 1, [date_con[count]] * 23) time_con = np.insert(time_con, count + 1, day_hours) var = np.insert(var, count + 1, [var[count]] * 23) #append to n duplicated array n_dup_array = np.append(n_dup_array, 0) n_dup_array = np.append(n_dup_array, [1] * 23) count += 24 elif file_res == 'mo': n_dup_array = [] file_hours = len(date_con) for i in range(file_hours): current_year = int(date_con[count][:4]) current_month = int(date_con[count][4:6]) current_day = int(date_con[count][6:]) current_hour = int(time_con[count][:2]) current_min = int(time_con[count][2:]) next_year = int(end_date_con[i][:4]) next_month = int(end_date_con[i][4:6]) next_day = int(end_date_con[i][6:]) next_hour = int(end_time_con[i][:2]) next_min = int(end_time_con[i][2:]) s = datetime.datetime(year=current_year, month=current_month, day=current_day, hour=current_hour, minute=current_min) e = datetime.datetime(year=next_year, month=next_month, day=next_day, hour=next_hour, minute=next_min) day_date = [ d.strftime('%Y%m%d') for d in pd.date_range(s, e, freq='H') ][1:-1] day_hour = [ d.strftime('%H%M') for d in pd.date_range(s, e, freq='H') ][1:-1] date_con = np.insert(date_con, count + 1, day_date) time_con = np.insert(time_con, count + 1, day_hour) var = np.insert(var, count + 1, [var[count]] * len(day_date)) #append to n duplicated array n_dup_array = np.append(n_dup_array, 0) n_dup_array = np.append(n_dup_array, [1] * len(day_date)) count += (len(day_date) + 1) date_con = np.array(date_con).astype(int) time_con = np.array(time_con).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] var = var[test_inds] n_dup_array = n_dup_array[test_inds] #convert nans to -99999's nan_inds = np.isnan(var) var[nan_inds] = -99999 if continuous_check == 'yes': st_big = ['continuous'] * len(var) else: st_big = ['filter'] * len(var) mm_big = [mm] * len(var) #get obs valid test = var >= 0 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = int(len(var[test]) - valid_hours_dup) n_after_flagsandlod += n_obs_valid #create max possible grid full_data = np.empty(len(grid_dates)) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con, time_con, start_year) converted_time = np.round(converted_time, decimals=5) raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') var = np.array(var) full_data_after_flagsandlod[raw_indices] = var raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) #test and remove duplicate and overlap points converted_time, var, mm_big, st_big, n_dup_array = modules.remove_duplicate_points( site_ref, converted_time, var, mm_big, st_big, n_dup_array, output_res) test = var >= 0 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = int(len(var[test]) - valid_hours_dup) n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = var big_n_dup_array[indices] = n_dup_array key_meta = [lat, lon, alt] #convert file res to standard format if file_res == 'hr': file_res = 'H' elif file_res == 'da': file_res = 'D' elif file_res == 'mo': file_res = 'M' #get sampling/instrument grids raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup( site_ref, process_group, species, raw_st, raw_mm, full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list, unknown_mm_refs_list, no2_type) #do quality checks data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control( site_ref, species, file_res, no2_type, grid_dates, full_data, big_n_dup_array, valid_hours_dup, raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) #set processed unit p_unit = 'pbbv' #get local timezone try: local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24 - int(datetime_offset.seconds / 60 / 60)) else: local_tz = int(datetime_offset.seconds / 60 / 60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref) unknown_local_tz_list.append(site_ref) #pack meta meta = [ lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz, local_tz, site_name, country, contact ] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
def site_iter_process(valid_refs, c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] #for ref_i in range(len(valid_refs)): data_valid = True site_ref = valid_refs[c] print 'Current Ref is = ', site_ref, c s_files = glob.glob( '/work/home/db876/observations/surface/%s/CAPMON/ozon_smpls_%s*' % (species, site_ref)) site_files = [] for y in year_array: for f in s_files: if str(y) in f: site_files.append(f) site_files = modules.natsorted(site_files) yymmdd = [] hhmm = [] vals = [] for file_i in range(len(site_files)): count = 0 meta_start = -99999 start_read_1 = False start_read_2 = False with open(site_files[file_i], 'rb') as f: reader = csv.reader(f, delimiter=',') print site_files[file_i] for row in reader: #print count #break out of loop at bottom of file if (start_read_2 == True) & (row[0] == '*TABLE ENDS'): break #get metadata try: if (row[0] == '*TABLE NAME') & (row[1] == 'Site information'): meta_start = count + 2 except: pass if count == meta_start: siteid_i = row.index('Site ID: standard') sitename_i = row.index('Description') lat_i = row.index('Latitude: decimal degrees') lon_i = row.index('Longitude: decimal degrees') try: alt_i = row.index( 'Ground elevation: above mean sea level') except: alt_i = row.index('Ground altitude') class_i = row.index('Site land use') if count == (meta_start + 6): latitude = row[lat_i] longitude = row[lon_i] altitude = row[alt_i] raw_class_name = row[class_i] site_name = row[sitename_i] #get data if start_read_2 == True: #read dates, times, and vals date = row[8] time = row[9] yymmdd.append(date[:4] + date[5:7] + date[8:]) hhmm.append(time[:2] + time[3:]) quality_code = row[13] #if flag not equal to V0 then make -99999 if quality_code == 'V0': vals = np.append(vals, np.float64(row[12])) else: vals = np.append(vals, -99999) try: if (row[0] == '*TABLE NAME') & (row[1] == 'OZONE_HOURLY'): start_read_1 = True except: pass if (start_read_1 == True) & (row[0] == '*TABLE COLUMN UNITS'): unit = row[12] if (start_read_1 == True) & (row[0] == '*TABLE BEGINS'): start_read_2 = True count += 1 #add to n_obs_all n_all += len(vals) n_after_nometa += len(vals) #convert data less < 0 to -99999 test_inv = vals < 0 vals[test_inv] = -99999 #create max possible grid full_data = np.empty(n_hours) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #put vals into full grid date_con = np.array(yymmdd).astype(int) time_con = np.array(hhmm).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] vals = vals[test_inds] #set st_big and mm_big st_big = ['continuous'] * len(vals) mm_big = ['ultraviolet photometry'] * len(vals) #get obs valid test = vals != -99999 n_obs_valid = len(vals[test]) n_after_flagsandlod += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con, time_con, start_year) converted_time = np.round(converted_time, decimals=5) syn_grid_time = np.arange(0, n_days, 1. / 24) syn_grid_time = np.round(syn_grid_time, decimals=5) raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') vals = np.array(vals) full_data_after_flagsandlod[raw_indices] = vals raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) # test and remove duplicate and overlap points converted_time, vals, mm_big, st_big, na = modules.remove_duplicate_points( site_ref, converted_time, vals, mm_big, st_big, 'blank', output_res) test = vals >= 0 n_obs_valid = int(len(vals[test])) n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = vals #get metadata try: lat = np.float32(latitude) except: lat = 'na' try: lon = np.float32(longitude) except: lon = 'na' try: alt = np.float32(altitude) except: alt = 'na' unit = str(unit) raw_class_name = str(raw_class_name) site_name = str(site_name) country = 'Canada' contact = 'Dave MacTavish, 4905 Dufferin St., Toronto ON, CANADA, M3H 5T4, [email protected]' #set data tz - all CAPMON times are UTC data_tz = 0 all_tz = [data_tz] key_meta = [lat, lon, alt] #set site file resolution file_res = 'H' #get sampling/instrument grids raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup( site_ref, process_group, species, raw_st, raw_mm, full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list, unknown_mm_refs_list, no2_type) #do quality checks data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control( site_ref, species, file_res, no2_type, grid_dates, full_data, big_n_dup_array, 0, raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) #set processed unit p_unit = 'pbbv' #get local timezone try: local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24 - int(datetime_offset.seconds / 60 / 60)) else: local_tz = int(datetime_offset.seconds / 60 / 60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref) unknown_local_tz_list.append(site_ref) #pack meta meta = [ lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz, local_tz, site_name, country, contact ] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
def site_iter_process(valid_refs, c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] data_valid = True site_ref = valid_refs[c] print 'ref = ', site_ref, c site_test = all_refs == site_ref site_yyyymmdd = yyyymmdd[site_test] site_hhmm = hhmm[site_test] site_vals = vals[site_test] site_vals = np.array(site_vals) #add val to total obs count n_all += len(site_vals) #test if site_ref in meta_refs, if not then exit if site_ref not in meta_refs: inv_nometa += 1 print 'Site Invalid. No Metadata for ref' if no2_type == 'MOLYBDENUM': n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_obs_after_anyvaliddata, inv_nokeymeta, n_obs_after_nokeymeta, inv_resolution, n_obs_after_resolution, inv_badmeasurementmethod, n_obs_after_badmeasurementmethod = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] exit_r = 'nometa' return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) n_after_nometa += len(site_vals) #convert blank values to -99999 test_inv = site_vals == '' site_vals[test_inv] = -99999 #convert number invalids to -99999 test_inv = site_vals < 0 site_vals[test_inv] = -99999 #create max possible grid full_data = np.empty(n_hours) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #get meta meta_index = meta_refs.index(site_ref) data_tz = np.float32(meta_tz[meta_index]) all_tz = [data_tz] try: lat = np.float32(meta_lats[meta_index]) except: lat = 'na' try: lon = np.float32(meta_lons[meta_index]) except: lon = 'na' try: alt = np.float32(meta_alts[meta_index]) except: alt = 'na' unit = 'na' raw_class_name = meta_class[meta_index] site_name = meta_sitename[meta_index] country = 'United States' contact = '*****@*****.**' #adjust dates and times if tz is not equal to 0 tz = int(data_tz) if tz != 0: for i in range(len(site_yyyymmdd)): #create datetime dt = datetime.datetime(int(site_yyyymmdd[i][:4]), int(site_yyyymmdd[i][4:6]), int(site_yyyymmdd[i][6:]), int(site_hhmm[i][:2]), int(site_hhmm[i][2:])) if tz > 0: dt = dt - datetime.timedelta(hours=int(tz)) elif tz < 0: dt = dt + datetime.timedelta(hours=np.abs(int(tz))) site_yyyymmdd[i] = dt.strftime("%Y%m%d") site_hhmm[i] = dt.strftime("%H%M") #put vals into full grid date_con = np.array(site_yyyymmdd).astype(int) time_con = np.array(site_hhmm).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] site_vals = site_vals[test_inds] #set st_big and mm_big st_big = ['continuous'] * len(site_vals) if species == 'O3': mm_big = ['ultraviolet photometry'] * len(site_vals) elif (species == 'NO'): mm_big = ['chemiluminescence'] * len(site_vals) elif (species == 'CO'): mm_big = ['non-dispersive infrared spectroscopy'] * len(site_vals) #get obs valid test = site_vals >= 0 n_obs_valid = len(site_vals[test]) n_after_flagsandlod += n_obs_valid print site_vals, n_after_flagsandlod #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con, time_con, start_year) converted_time = np.round(converted_time, decimals=5) syn_grid_time = np.arange(0, n_days, 1. / 24) syn_grid_time = np.round(syn_grid_time, decimals=5) raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') site_vals = np.array(site_vals) full_data_after_flagsandlod[raw_indices] = site_vals raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) # test and remove duplicate and overlap points converted_time, site_vals, mm_big, st_big, na = modules.remove_duplicate_points( site_ref, converted_time, site_vals, mm_big, st_big, 'blank', output_res) test = site_vals >= 0 n_obs_valid = int(len(site_vals[test])) n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = site_vals key_meta = [lat, lon, alt] #set site file resolution file_res = 'H' #get sampling/instrument grids raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup( site_ref, process_group, species, raw_st, raw_mm, full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list, unknown_mm_refs_list, no2_type) #do quality checks data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control( site_ref, species, file_res, no2_type, grid_dates, full_data, big_n_dup_array, 0, raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) #make tz int after checks data_tz = np.float32(data_tz) #set processed unit p_unit = 'pbbv' #get local timezone try: local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24 - int(datetime_offset.seconds / 60 / 60)) else: local_tz = int(datetime_offset.seconds / 60 / 60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref) unknown_local_tz_list.append(site_ref) #pack meta meta = [ lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz, local_tz, site_name, country, contact ] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
def run_LSP(mod_data, x): lat_i = lat_indices[x] lon_i = lon_indices[x] print lat_i, lon_i current_lat = lat_c[lat_i] current_lon = lon_c[lon_i] waveform = mod_data waveform_ave = np.average(waveform) model_date_val = np.copy(model_date) model_time_val = np.copy(model_time) time = modules.date_process(model_date_val, model_time_val, start_year) if (species.lower() != 'gmao_temp') and (species.lower() != 'gmao_psfc') and ( species.lower() != 'wind_speed') and (species.lower() != 'wind_direction'): waveform = waveform * 1e9 #check model vals are valid #valid = vals >= 0 #vals = vals[valid] #model_time_val = model_time[valid] #model_date_val = model_date[valid] #take 8 hour average divisor = 8 total_len = len(waveform) / divisor start = 0 end = divisor ave_waveform = [] ave_time = [] for i in range(total_len): ave = np.ma.average(waveform[start:end]) ave_time = np.append(ave_time, time[start]) ave_waveform = np.append(ave_waveform, ave) start += divisor end += divisor time = np.copy(ave_time) waveform = np.copy(ave_waveform) #take lsp unwindowed of waveform ua_periods, ua_mag, ua_ph, ua_fr, ua_fi = modules.take_lomb_unwindowed( time, waveform, ofac, 1. / 24) #take out known periodic components 1,182.625, and 365.25 a priori for more accurate red noise fit. closest_daily_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i] - 1.)) closest_ha_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i] - 182.625)) closest_annual_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i] - 365.25)) rm_indices = [closest_daily_index, closest_ha_index, closest_annual_index] ua_mag_c, ua_fr, ua_fi = redfit.sidelobe_percent_remove( np.copy(ua_mag), ua_fr, ua_fi, rm_indices, 5., ua_periods) #------------------------------------------------------------------------------- #Do IFFT of altered spectra - with significant periods removed and gaps left in real and imag components linearly interpolated. #altered spectra provides red noise estimation baseline ##use ifft to get time series back from adjusted spectra #complex Fourier spectrum which corresponds to the Lomb-Scargle periodogram: F = [0] * ((len(ua_fr) * 2) + 1) #set first real value to average F[0] = complex(waveform_ave * len(waveform), 0) #Get reverse real and imaginary values rev_ua_fr = np.copy(ua_fr[::-1]) rev_ua_fi = np.copy(ua_fi[::-1]) rev_ua_fr[0] = 0 rev_ua_fi[0] = 0 f_index = 1 #Fill Fourier Spectrum real and imaginary values for i in range(len(ua_fr)): F[f_index] = complex(ua_fr[i], ua_fi[i]) f_index += 1 for i in range(len(ua_fr)): F[f_index] = complex(rev_ua_fr[i], -rev_ua_fi[i]) f_index += 1 F = np.array(F) #Take ifft and just take real values ifft_ua_ts = numpy.fft.ifft(F) ifft_ua_ts = ifft_ua_ts.astype('float64') ifft_ua_ts_len = (len(ifft_ua_ts) / ofac) + np.mod(len(ifft_ua_ts), ofac) ifft_time = time[-ifft_ua_ts_len:] ifft_ua_ts = ifft_ua_ts[-len(waveform):] ifft_time = ifft_time - ifft_time[0] a_periods, a_mag, corr_a_mag, a_fr, a_fi, a_red_periods, a_red_mag, a_gredth, a_fac95, a_fac99, a_fac99_9, a_faccrit, a_fac_grid, a_sig_levels, a_tau, a_corr = redfit.red_background( nsim, mctest, ifft_time, ifft_ua_ts, ofac) #apply lsp correction from altered spectrum to unaltered spectrum corr_ua_mag = ua_mag / a_corr #check confidence of each point on spectrum sigs = np.zeros(len(corr_ua_mag)) last_ind = len(a_sig_levels) - 1 for i in range(len(a_sig_levels) - 1): conf_low = a_gredth * a_fac_grid[i] conf_up = a_gredth * a_fac_grid[i + 1] current_last_ind = i + 1 for j in range(len(corr_ua_mag)): if sigs[j] == 0: if (corr_ua_mag[j] >= conf_low[j]) and (corr_ua_mag[j] < conf_up[j]): sigs[j] = a_sig_levels[i] elif current_last_ind == last_ind: if corr_ua_mag[j] > conf_up[j]: sigs[j] = a_sig_levels[i + 1] #get critical significance for all points on spectrum crit_sig = a_gredth * a_faccrit #get 95,99 and 99.9 % chi squared significance bands for all points on spectrum sig_95 = a_gredth * a_fac95 sig_99 = a_gredth * a_fac99 sig_99_9 = a_gredth * a_fac99_9 return (x, sigs, sig_95, sig_99, sig_99_9, crit_sig, a_gredth, corr_ua_mag, ua_periods, a_tau)
def onpick(event): global pl global ind global fig2 ind = event.ind print 'ind = ', ind ind = ind[0] #x_data = event.xdata #y_data = event.ydata #find ind of closest lat/lon #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data) try: for i in range(len(pl)): pl.pop(0).remove() first_run = False except: first_run = True pass pl = m.plot([linear_lons[ind]], [linear_lats[ind]], 's', ms=20, alpha=0.6, color='yellow', zorder=20) #get model timeseries for site clicked lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, linear_lats[ind], linear_lons[ind]) model_var_pick = model_var[:, lat_n, lon_n] model_var_pick = model_var_pick * 1e9 model_var_mask = np.ma.masked_where(model_var_pick <= 0, model_var_pick) model_time_pd = pd.date_range(start=model_datetimes[0], end=model_datetimes[-1], freq='H') model_var_pd = pd.Series(model_var_mask, index=model_time_pd) #create sine wave from amp/phase model_date_l = model_date.astype(int) model_time_l = model_time.astype(int) model_times = modules.date_process(model_date_l, model_time_l, start_year) model_times = np.array(model_times) pi2 = np.pi * 2 ratio = 100. / annual_amp[lat_n, lon_n] ha_percent = ratio * annual_amp[lat_n, lon_n] #convert phases to radians calc = pi2 / 24. calc = pi2 / 6. ha_ph_r = ha_ph[lat_n, lon_n] * calc calc = pi2 / 12. annual_ph_r = annual_ph[lat_n, lon_n] * calc ha_model_wave = ha_amp[lat_n, lon_n] * (np.cos((pi2 * model_times / (365.25 / 2.)) - (ha_ph_r))) annual_model_wave = annual_amp[lat_n, lon_n] * (np.cos((pi2 * model_times / (365.25)) - (annual_ph_r))) ha_primary = p_ha_ph[lat_n, lon_n] ha_secondary = s_ha_ph[lat_n, lon_n] ha_model_wave = ha_model_wave + ave[lat_n, lon_n] annual_model_wave = annual_model_wave + ave[lat_n, lon_n] model_ha_wave_pd = pd.Series(ha_model_wave, index=model_time_pd) model_annual_wave_pd = pd.Series(annual_model_wave, index=model_time_pd) fig.canvas.draw() if first_run == False: plt.close(fig2) fig2, (axo) = plt.subplots(1, figsize=(24, 12)) fig2.patch.set_facecolor('white') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='black', markersize=3, label='Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_ha_wave_pd, color='green', markersize=3, label='Ha Waveform', markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_annual_wave_pd, color='red', markersize=3, label='Annual Waveform', markeredgecolor='None') #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent)) plt.legend(loc='lower right') plt.tight_layout() axo.grid() plt.show() else: fig2, (axo) = plt.subplots(1, figsize=(24, 12)) fig2.patch.set_facecolor('white') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='black', markersize=3, label='Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_ha_wave_pd, color='green', markersize=3, label='Ha Waveform', markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_annual_wave_pd, color='red', markersize=3, label='Annual Waveform', markeredgecolor='None') #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent)) plt.legend(loc='lower right') plt.tight_layout() axo.grid() plt.show()
site_ref = raw_input('Choose site from list. Sites with full set of yearly files between %i & %i are:\n%s\n'%(first_year,last_year+1,' '.join(i for i in valid_refs))) #read in specific site data site_group = root_grp.groups[site_ref] #read in variables for site obs_var = site_group.variables['o3'][:] full_obs_var = obs_var[:] full_obs_var_mask = np.ma.masked_where(full_obs_var<=0,full_obs_var) obs_date = site_group.variables['date'][:] obs_time = site_group.variables['time'][:] obs_lat = site_group.latitude obs_lon = site_group.longitude obs_alt = site_group.altitude obs_times = modules.date_process(obs_date,obs_time) obs_times = np.array(obs_times) obs_times_full = obs_times[:] ##cut out invalid obs data obs_var_mask = np.ma.masked_where(obs_var<=0,obs_var) valids = obs_var > 0 obs_var = obs_var[valids] obs_times = obs_times[valids] obs_ave = np.average(obs_var) year_val = [] month_val = [] day_val = [] hour_val = []
def site_iter_process(valid_refs,c): #for ref_i in range(len(valid_refs)): data_valid = True site_ref = valid_refs[c] print 'Current Ref is = ', site_ref s_files = glob.glob('/work/home/db876/observations/surface/%s/CAPMON/ozon_smpls_%s*'%(species,site_ref)) site_files = [] for y in year_array: for f in s_files: if str(y) in f: site_files.append(f) site_files = modules.natsorted(site_files) yymmdd = [] hhmm = [] vals = [] #create max possible o3 grid full_data = np.empty(n_hours) full_data[:] = -99999 for file_i in range(len(site_files)): count = 0 meta_start = -99999 start_read_1 = False start_read_2 = False with open(site_files[file_i], 'rb') as f: reader = csv.reader(f,delimiter=',') print site_files[file_i] for row in reader: #print count #break out of loop at bottom of file if (start_read_2 == True) & (row[0] == '*TABLE ENDS'): break #get metadata try: if (row[0] =='*TABLE NAME') & (row[1] == 'Site information'): meta_start = count+2 except: pass if count == meta_start: lat_i = row.index('Latitude: decimal degrees') lon_i = row.index('Longitude: decimal degrees') try: alt_i = row.index('Ground elevation: above mean sea level') except: alt_i = row.index('Ground altitude') class_i = row.index('Site land use') if count == (meta_start+6): latitude = row[lat_i] longitude = row[lon_i] altitude = row[alt_i] raw_class_name = row[class_i] #get data if start_read_2 == True: #read dates, times, and vals date = row[8] time = row[9] yymmdd.append(date[:4]+date[5:7] + date[8:]) hhmm.append(time[:2]+time[3:]) quality_code = row[13] if quality_code == 'V0': vals = np.append(vals,np.float64(row[12])) else: vals = np.append(vals,-99999) try: if (row[0] == '*TABLE NAME') & (row[1] == 'OZONE_HOURLY'): start_read_1 = True except: pass if (start_read_1 == True) & (row[0] == '*TABLE COLUMN UNITS'): unit = row[12] if (start_read_1 == True) & (row[0] == '*TABLE BEGINS'): start_read_2 = True count+=1 #convert all invalids to -99999 test_inv = vals < 0 vals[test_inv] = -99999 #put o3 vals into full grid date_con = np.array(yymmdd).astype(int) time_con = np.array(hhmm).astype(int) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) syn_grid_time = np.arange(0,n_days,1./24) syn_grid_time = np.round(syn_grid_time,decimals=5) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') vals = np.array(vals) #make sure no data is past end year index_test = indices < len(full_data) indices = indices[index_test] vals = vals[index_test] full_data[indices] = vals #get metadata lat = np.float64(latitude) lon = np.float64(longitude) alt = np.float64(altitude) #do data quality checks full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,np.float64(altitude),grid_dates,start_year,end_year) #set measurement method mm = 'ultraviolet photometry' #set site file resolution file_res = 'H' #set sampling as average st = 'average' anthrome_class_name = 'na' return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
average_years = raw_input('\nTake average cycles of all years? y or n.\n') #read in specific site data site_group = root_grp.groups[site_ref] #read in variables for site obs_var = site_group.variables['o3'][:] full_obs_var = obs_var[:] full_obs_var_mask = np.ma.masked_where(full_obs_var<=0,full_obs_var) obs_date = site_group.variables['date'][:] obs_time = site_group.variables['time'][:] obs_lat = site_group.latitude obs_lon = site_group.longitude obs_alt = site_group.altitude obs_times = modules.date_process(obs_date,obs_time,first_year) obs_times = np.array(obs_times) obs_times_full = obs_times[:] ##cut out invalid obs data obs_var_mask = np.ma.masked_where(obs_var<=0,obs_var) valids = obs_var > 0 obs_var = obs_var[valids] obs_times = obs_times[valids] obs_ave = np.average(obs_var) year_val = [] month_val = [] day_val = [] hour_val = []
def site_iter_process(valid_refs,c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] #read files site at a time #for ref_i in range(len(valid_refs)): site_ref = valid_refs[c] all_latitudes = [] all_longitudes = [] all_altitudes = [] all_unit = [] all_site_name = [] all_country = [] all_contact = [] mm_big = [] meta_valid_list = [] data_valid = True print 'Current Ref is = ', site_ref,c #find if sites have full valid range from start year and finishing in end year s_files = glob.glob('/work/home/db876/observations/surface/%s/EMEP/%s*'%(fname_species,site_ref)) year_files = [file.replace("/work/home/db876/observations/surface/%s/EMEP/"%(fname_species), "") for file in s_files] cut_year_files = [file[8:12] for file in year_files] site_files = [] for y in year_array: for i in range(len(s_files)): if str(y) in cut_year_files[i]: site_files.append(s_files[i]) site_files = modules.natsorted(site_files) #test for duplicate file years, if duplicates break processing file_years = [] for file in site_files: last_file_split = file.split('/')[-1] file_years=np.append(file_years,last_file_split[8:12]) for y in year_array: test = file_years == str(y) if len(file_years[test]) > 1: print 'Site has duplicate files for %s. Breaking processing'%(y) 1+'a' if site_files == []: print 'No valid files for site\n' return #remove daily/monthly files if necessary if output_res == 'H': del_i = [] for i in range(len(site_files)): if '.1d.' in site_files[i]: del_i.append(i) elif '.1mo.' in site_files[i]: del_i.append(i) site_files=np.delete(site_files,del_i) elif output_res == 'HD': del_i = [] for i in range(len(site_files)): if '.1mo.' in site_files[i]: del_i.append(i) site_files=np.delete(site_files,del_i) for y in year_array: bad_meta = False got_year = False for file in site_files: last_file_split = file.split('/')[-1] if str(y) in last_file_split[8:12]: got_year = True break if got_year == False: #fill in data for missing year timedelta_diff = datetime.date(y+1, 1, 1) - datetime.date(y, 1, 1) ndays_missing = timedelta_diff.days continue count = 0 with open(file, 'rb') as f: reader = csv.reader(f,delimiter=' ') print file for row in reader: try: row = filter(lambda a: a != '', row) except: pass try: row = filter(lambda a: a != ',', row) except: pass #get start date of file if row[0] == 'Startdate:': data = row[1] s_yyyy = data[:4] s_mm = data[4:6] s_dd = data[6:8] s_hh = data[8:10] s_min = data[10:12] start_datetime = datetime.datetime(int(s_yyyy),1,1,0,0) #get unit if row[0] == 'Unit:': try: if len(row) == 3: unit_part1 = row[1] unit_part2 = row[2] unit = unit_part1+'_'+unit_part2 elif len(row) == 2: unit = row[1] all_unit.append(unit) except: bad_meta = True #get resolution if row[0] == 'Resolution': if row[1] == 'code:': file_res = row[2] print 'Resolution = %s'%file_res #get latitude if row[0] == 'Station': if row[1] == 'latitude:': latitude = row[2] all_latitudes.append(latitude) #get longitude if row[0] == 'Station': if row[1] == 'longitude:': longitude = row[2] all_longitudes.append(longitude) #get altitude if row[0] == 'Station': if row[1] == 'altitude:': altitude = row[2][:-1] all_altitudes.append(altitude) #get site name if row[0] == 'Station': if row[1] == 'name:': site_name = row[2] all_site_name.append(site_name) #get period if row[0] == 'Period': period_code = row[2] #get stats method if row[0] == 'Statistics:': try: st = row[1] + row[2] if st != 'arithmeticmean': print 'Not Arithmetic Mean!' print row[1] print 1+'a' except: print 'Not Arithmetic Mean!' print row[1] print 1+'a' #get instrument method and name if row[0] == 'Instrument': if row[1] == 'type:': mm_list = row[2:] if len(mm_list) > 1: site_mm = '' for x in range(len(mm_list)): site_mm = site_mm+mm_list[x]+' ' site_mm = site_mm.strip() else: site_mm = mm_list[0] if row[1] == 'name:': mn_list = row[2:] if len(mn_list) > 1: site_mn = '' for x in range(len(mn_list)): site_mn = site_mn+mn_list[x]+' ' site_mn = site_mn.strip() else: site_mn = mn_list[0] #get method ref if row[0] == 'Method': if row[1] == 'ref:': try: mf_list = row[2:] if len(mf_list) > 1: site_mf = '' for x in range(len(mf_list)): site_mf = site_mf+mf_list[x]+' ' site_mf = site_mf.strip() else: site_mf = mf_list[0] except: site_mf = '' #put together intrument type+instrument_name+method_ref mm = site_mm+site_mn+site_mf #get contact if row[0] == 'Originator:': try: contact_list = row[1:] if len(contact_list) > 1: site_contact = '' for x in range(len(mf_list)): site_contact = site_contact+contact_list[x]+' ' site_contact = site_contact.strip() else: site_contact = site_contact[0] except: site_contact = '' all_contact.append(site_contact) #get country site_country = EMEP_COUNTRIES(file.split('/')[-1][:2]) all_country.append(site_country) if row[0] == 'starttime': skip_n = count+1 if species == 'ISOP': spec_ind = row.index('C5H8') try: flag_ind = row.index('flag_C5H8') except: flag_ind = row.index('flag') else: spec_ind = row.index(species) try: flag_ind = row.index('flag_'+species) except: flag_ind = row.index('flag') count+=1 read = np.loadtxt(file,dtype="f8,f8,f8,f8",skiprows=skip_n,usecols=(0,1,spec_ind,flag_ind),unpack=True) read = np.array(read) times_since_start = read[0,:] endtimes_since_start = read[1,:] conc = read[2,:] conc = np.array(conc).astype('float64') flags = read[3,:] dates = [] times = [] enddates = [] endtimes = [] times_since_start = np.float64(times_since_start) endtimes_since_start = np.float64(endtimes_since_start) for x in range(len(times_since_start)): days_since_start = math.trunc(times_since_start[x]) enddays_since_start = math.trunc(endtimes_since_start[x]) remainder = times_since_start[x] - days_since_start remainder_end = endtimes_since_start[x] - enddays_since_start unrounded_hour = remainder*24 unrounded_hour_end = remainder_end*24 hour = np.round(unrounded_hour) hour_end = np.round(unrounded_hour_end) time_delta = datetime.timedelta(days = days_since_start,hours = hour) time_delta_end = datetime.timedelta(days = enddays_since_start,hours = hour_end) calc_datetime = start_datetime + time_delta calc_datetime_end = start_datetime + time_delta_end calc_yyyymmdd = calc_datetime.strftime("%Y%m%d") calc_hhmm = calc_datetime.strftime("%H%M") end_calc_yyyymmdd = calc_datetime_end.strftime("%Y%m%d") end_calc_hhmm = calc_datetime_end.strftime("%H%M") dates.append(calc_yyyymmdd) times.append(calc_hhmm) enddates.append(end_calc_yyyymmdd) endtimes.append(end_calc_hhmm) conc = np.float64(conc) flags = np.float64(flags) #add to n_obs_all n_all += len(conc) #IF bad_meta == True then set all file vals as nans if bad_meta == True: conc[:] = np.NaN meta_valid_list.append(bad_meta) #DO INLINE INVALID AND FLAG CONVERT to NaN test = conc < 0 conc[test] = np.NaN test = flags != 0 conc[test] = np.NaN #convert units by line (only if value is >= than 0 try: if (unit.lower() != 'ppb') & (unit.lower() != 'ppbv'): if unit == 'ug/m3': #calculate conversion factor from mg/m3 assuming 293K and 1013 hPa - in EU LAW #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144/mol_mass*(293.)/(1013./10.) conc = conv_fact*conc elif unit == 'ug_N/m3': conv_fact = 8.3144/14.00674*(293.)/(1013./10.) conc = conv_fact*conc elif (unit == 'ppm') or (unit == 'ppmv'): conc = conc*1e3 #print 'Converting Units from ppmv to ppbv' elif (unit == 'ppt') or (unit == 'pptv'): conc = conc/1e3 #print 'Converting Units from pptv to ppbv' else: print 'Unknown Unit' 1+'a' except: pass #remove 9.999 from ISOP dataset if species == 'ISOP': test = conc == 9.999 conc[test] = np.NaN #if file resolution is daily or monthly then replicate times after point, to fill hourly data array. count=0 if file_res == '1h': n_dups = np.zeros(len(conc)) elif file_res == '1d': n_dups = [] #if measurement method is flask, then put leave flask measurement in as hourly measurement, the first hour of month file_hours = len(dates) for i in range(file_hours): current_year = int(dates[count][:4]) current_month = int(dates[count][4:6]) current_day = int(dates[count][6:]) current_hh = int(times[count][:2]) current_mm = int(times[count][2:]) next_year = int(enddates[i][:4]) next_month = int(enddates[i][4:6]) next_day = int(enddates[i][6:]) next_hh = int(endtimes[i][:2]) next_mm = int(endtimes[i][2:]) s = datetime.datetime(year = current_year, month = current_month, day = current_day, hour = current_hh, minute = current_mm) e = datetime.datetime(year = next_year, month = next_month, day = next_day, hour = next_hh, minute = next_mm) day_dates = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][1:-1] day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1] dates = np.insert(dates,count+1,day_dates) times = np.insert(times,count+1,day_hours) conc = np.insert(conc,count+1,[conc[count]]*len(day_dates)) #append to n duplicated array n_dups=np.append(n_dups,0) n_dups=np.append(n_dups,[1]*len(day_dates)) count +=(len(day_dates)+1) elif file_res == '1mo': n_dups = [] #if measurement method is flask, then put leave flask measurement in as hourly measurement, the first hour of month file_hours = len(dates) for i in range(file_hours): current_year = int(dates[count][:4]) current_month = int(dates[count][4:6]) current_day = int(dates[count][6:]) current_hh = int(times[count][:2]) current_mm = int(times[count][2:]) next_year = int(enddates[i][:4]) next_month = int(enddates[i][4:6]) next_day = int(enddates[i][6:]) next_hh = int(endtimes[i][:2]) next_mm = int(endtimes[i][2:]) s = datetime.datetime(year = current_year, month = current_month, day = current_day, hour = current_hh, minute = current_mm) e = datetime.datetime(year = next_year, month = next_month, day = next_day, hour = next_hh, minute = next_mm) day_dates = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][1:-1] day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1] dates = np.insert(dates,count+1,day_dates) times = np.insert(times,count+1,day_hours) conc = np.insert(conc,count+1,[conc[count]]*len(day_dates)) #append to n duplicated array n_dups=np.append(n_dups,0) n_dups=np.append(n_dups,[1]*len(day_dates)) count += (len(day_dates)+1) data = [dates,times,conc,n_dups] #put measurnement methods and into big list len of times mm_big=np.append(mm_big,[mm]*len(dates)) try: big_list = np.hstack((big_list,data)) except: big_list = np.array(data) if (y == year_array[-1]): #get dates and times date_con = big_list[0,:] time_con = big_list[1,:] #get vals vals = np.array(big_list[2,:]).astype('float64') #get n dup array n_dup_array = np.array(big_list[3,:]).astype(float).astype(int) #if all files have missing key meta then exit if all(i == True for i in meta_valid_list) == True: inv_nometa += 1 print 'Site Invalid. No Metadata for ref' if no2_type == 'MOLYBDENUM': n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_obs_after_anyvaliddata,inv_nokeymeta,n_obs_after_nokeymeta,inv_resolution,n_obs_after_resolution,inv_badmeasurementmethod,n_obs_after_badmeasurementmethod = 0,0,0,0,0,0,0,0,0,0,0,0,0 exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod]) n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod]) unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list] meta = ['na','na','na','na','na','na','na','na','na','na','na','na'] exit_r = 'nometa' return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1) valid_hours_dup = np.sum(n_dup_array) n_after_nometa += (len(vals)-valid_hours_dup) #delete big list del big_list date_con = np.array(date_con).astype(int) time_con = np.array(time_con).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] vals = vals[test_inds] mm_big = mm_big[test_inds] n_dup_array = n_dup_array[test_inds] #set st_big as 'continuous' st_big = ['continuous']*len(vals) #convert all Nans back to -99999 test = np.isnan(vals) vals[test] = -99999 #get obs valid test = vals >= 0 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = int(len(vals[test]) - valid_hours_dup) n_after_flagsandlod += n_obs_valid #create max possible species grid, measurement method and sampling type grids full_data = np.empty(n_hours) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) syn_grid_time = np.arange(0,n_days,1./24) syn_grid_time = np.round(syn_grid_time,decimals=5) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') vals = np.array(vals) full_data_after_flagsandlod[raw_indices] = vals raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) # test and remove duplicate and overlap points converted_time,vals,mm_big,st_big,n_dup_array = modules.remove_duplicate_points(site_ref,converted_time,vals,mm_big,st_big,n_dup_array,output_res) test = vals >= 0 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = int(len(vals[test]) - valid_hours_dup) n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = vals big_n_dup_array[indices] = n_dup_array #get mode of metadata try: lat = np.float32(stats.mode(all_latitudes)[0][0]) except: lat = 'na' try: lon = np.float32(stats.mode(all_longitudes)[0][0]) except: lon = 'na' try: alt = np.float32(stats.mode(all_altitudes)[0][0]) except: alt = 'na' unit = stats.mode(all_unit)[0][0] #remove empty strings from extra meta before mode test try: site_name = stats.mode(filter(None, all_site_name))[0][0] except: site_name = 'na' try: country = stats.mode(filter(None, all_country))[0][0] except: country = 'na' try: contact = stats.mode(filter(None, all_contact))[0][0] except: contact = 'na' #set data tz - all EMEP times are UTC data_tz = 0 all_tz = [data_tz] key_meta = [lat,lon,alt] #convert file res to standard format if file_res == '1h': file_res = 'H' elif file_res == '1d': file_res = 'D' elif file_res == '1mo': file_res = 'M' #get sampling/instrument grids raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,unknown_mm_list,unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(site_ref,process_group,species,raw_st,raw_mm,full_data_after_flagsandlod,full_data,raw_indices,unknown_mm_list,unknown_mm_refs_list,no2_type) #do quality checks data_valid,full_data,valid_hours_dup,p_st_grid,p_mm_grid,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod,exit_r = modules.primary_quality_control(site_ref,species,file_res,no2_type,grid_dates,full_data,big_n_dup_array,valid_hours_dup,raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,data_resolution,n_obs_valid,key_meta,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod]) n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod]) unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list] meta = [lat,lon,alt,'na','na','na','na','na','na','na','na','na'] return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1) #set metadata not available as na raw_class_name = 'na' #set processed unit p_unit = 'ppbv' #get local timezone try: local_tz_name = tz_root.tzNameAt(lat,lon,forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000,1,1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24-int(datetime_offset.seconds/60/60)) else: local_tz = int(datetime_offset.seconds/60/60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s'%(site_ref) unknown_local_tz_list.append(site_ref) #pack meta meta = [lat,lon,alt,raw_class_name,file_res,unit,p_unit,data_tz,local_tz,site_name,country,contact] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod]) n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list] return c,full_data,p_st_grid,p_mm_grid,data_valid,meta,exit_c_list,n_c_list,unknown_list,'na',big_n_dup_array
def onpick(event): global pl global ind global fig2 ind = event.ind ind = ind[0] #x_data = event.xdata #y_data = event.ydata #find ind of closest lat/lon #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data) try: for i in range(len(pl)): pl.pop(0).remove() first_run = False except: first_run = True pass pl = m.plot([X[ind]], [Y[ind]], 'o', ms=12, alpha=0.6, color='yellow',zorder=20) #get model timeseries for site clicked lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,obs_lats[ind],obs_lons[ind]) model_var_pick = model_var[:,lat_n,lon_n] model_var_pick = model_var_pick*1e9 model_var_mask = np.ma.masked_where(model_var_pick<=0,model_var_pick) if model_name == 'MACC': model_time_pd = pd.date_range(start = model_datetimes[0],end = model_datetimes[-1], freq = 'H') count = 0 valids = [] for i in range(len(model_time_pd)): if count == 0: valids.append(i) count+=1 elif count == 2: count = 0 else: count+=1 model_time_pd = model_time_pd[valids] model_var_pd = pd.Series(model_var_mask, index=model_time_pd) else: model_time_pd = pd.date_range(start = model_datetimes[0],end = model_datetimes[-1], freq = 'H') model_var_pd = pd.Series(model_var_mask, index=model_time_pd) #get obs timeseries for site clicked ref = obs_refs[ind] obs_ts_group = obs_root_grp.groups[ref] obs_var = obs_ts_group.variables[species.lower()][:] group = obs_ts_group.process_group lat = obs_ts_group.latitude lon = obs_ts_group.longitude lon = obs_ts_group.longitude alt = obs_ts_group.altitude complete = obs_ts_group.completeness a_class = obs_ts_group.anthrome_class r_class = obs_ts_group.raw_class continent = loc_dict[tags[ind]] country = obs_ts_group.country obs_var_mask = np.ma.masked_where(obs_var<=0,obs_var) obs_time_pd = pd.date_range(start = obs_datetimes[0],end = obs_datetimes[-1], freq = 'H') obs_var_pd = pd.Series(obs_var_mask, index=obs_time_pd) #create sine wave from amp/phase obs_date_l = obs_date.astype(int) obs_time_l = obs_time.astype(int) obs_times = modules.date_process(obs_date_l,obs_time_l,start_year) obs_times = np.array(obs_times) pi2 = np.pi*2 #convert phases to radians calc = pi2/6. obs_ha_phase_r = obs_ha_phase[ind] * calc calc = pi2/12. obs_annual_phase_r = obs_annual_phase[ind] * calc ha_obs_wave = obs_ha_mag[ind]*(np.cos((pi2*obs_times/(365.25/2.))-(obs_ha_phase_r))) annual_obs_wave = obs_annual_mag[ind]*(np.cos((pi2*obs_times/(365.25))-(obs_annual_phase_r))) seasonal_obs_wave = (ha_obs_wave+annual_obs_wave)+obs_ave[ind] obs_seasonal_wave_pd = pd.Series(seasonal_obs_wave, index=obs_time_pd) #create sine wave from amp/phase mod_date_l = model_date.astype(int) mod_time_l = model_time.astype(int) mod_times = modules.date_process(mod_date_l,mod_time_l,start_year) mod_times = np.array(mod_times) pi2 = np.pi*2 #convert phases to radians calc = pi2/6. model_ha_phase_r = model_ha_phase[ind] * calc calc = pi2/12. model_annual_phase_r = model_annual_phase[ind] * calc ha_model_wave = model_ha_mag[ind]*(np.cos((pi2*mod_times/(365.25/2.))-(model_ha_phase_r))) annual_model_wave = model_annual_mag[ind]*(np.cos((pi2*mod_times/(365.25))-(model_annual_phase_r))) seasonal_model_wave = (ha_model_wave+annual_model_wave)+model_ave[ind] model_seasonal_wave_pd = pd.Series(seasonal_model_wave, index=model_time_pd) #get spectra data site_group_obs = root_grp_obs_spec.groups[ref] site_group_mod = root_grp_mod_spec.groups[ref] obs_period = site_group_obs.variables['period'][:] mod_period = site_group_mod.variables['period'][:] obs_amp = site_group_obs.variables['amplitude'][:] mod_amp = site_group_mod.variables['amplitude'][:] fig.canvas.draw() if first_run == False: plt.close(fig2) fig2, (axo,axo2) = plt.subplots(2,figsize=(24,12)) fig2.patch.set_facecolor('white') #fig2 = plt.figure() axo.plot_date(obs_time_pd.to_pydatetime(), obs_var_pd, color='black', markersize = 3, label = 'Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='red',alpha=0.5, markersize = 3, label = '%s %s %s %s'%(model_name,version,grid_size,met),markeredgecolor='None') axo.plot_date(obs_time_pd.to_pydatetime(), obs_seasonal_wave_pd, color='yellow', markersize = 3, label = 'Obs Seasonal Waveform',markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_seasonal_wave_pd, color='green', markersize = 3, label = 'Model Seasonal Waveform',markeredgecolor='None') axo2.loglog(obs_period,obs_amp,color='black',label='Obs') axo2.loglog(mod_period,mod_amp,color='red',label = '%s %s %s %s'%(model_name,version,grid_size,met)) axo2.text(0.01, 0.95, 'Obs D Amp = %8.2f ppb'%(obs_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.92, 'Model D Amp = %8.2f ppb'%(model_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.85, 'Obs HA Amp = %8.2f ppb'%(obs_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.82, 'Model HA Amp = %8.2f ppb'%(model_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.75, 'Obs A Amp = %8.2f ppb'%(obs_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.72, 'Model A Amp = %8.2f ppb'%(model_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.55, 'Obs D Phase = %8.2f'%(obs_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.52, 'Model D Phase = %8.2f'%(model_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.45, 'Obs HA Phase = %8.2f'%(obs_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.42, 'Model HA Phase = %8.2f'%(model_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red') obs_a_ph = obs_annual_phase[ind] mod_a_ph = model_annual_phase[ind] if obs_a_ph > 12: obs_a_ph = obs_a_ph-12. if mod_a_ph > 12: mod_a_ph = mod_a_ph-12. axo2.text(0.01, 0.35, 'Obs A Phase = %8.2f'%(obs_a_ph),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.32, 'Model A Phase = %8.2f'%(mod_a_ph),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.15, 'Obs Ave = %8.2f ppb'%(obs_ave[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.12, 'Model Ave = %8.2f ppb'%(model_ave[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.axvline(1.,ymin=0,ymax=1,color='blue',linestyle='--') axo2.axvline(182.625,ymin=0,ymax=1,color='blue',linestyle='--') axo2.axvline(365.25,ymin=0,ymax=1,color='blue',linestyle='--') axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter)) plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter)) axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,lat_n,lon_n)) plt.legend(loc = 'lower right') plt.tight_layout() axo.grid() axo2.grid() plt.show() else: #fig2 = plt.figure() fig2, (axo,axo2) = plt.subplots(2,figsize=(24,12)) fig2.patch.set_facecolor('white') axo.plot_date(obs_time_pd.to_pydatetime(), obs_var_pd, color='black', markersize = 3, label = 'Observations') axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='red', markersize = 3,alpha=0.5, label = '%s %s %s %s'%(model_name,version,grid_size,met),markeredgecolor='None') axo.plot_date(obs_time_pd.to_pydatetime(), obs_seasonal_wave_pd, color='yellow', markersize = 3, label = 'Obs Seasonal Waveform',markeredgecolor='None') axo.plot_date(model_time_pd.to_pydatetime(), model_seasonal_wave_pd, color='green', markersize = 3, label = 'Model Seasonal Waveform',markeredgecolor='None') axo2.loglog(obs_period,obs_amp,color='black',label='Obs') axo2.loglog(mod_period,mod_amp,color='red', label = '%s %s %s %s'%(model_name,version,grid_size,met)) axo2.text(0.01, 0.95, 'Obs D Amp = %8.2f ppb'%(obs_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.92, 'Model D Amp = %8.2f ppb'%(model_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.85, 'Obs HA Amp = %8.2f ppb'%(obs_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.82, 'Model HA Amp = %8.2f ppb'%(model_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.75, 'Obs A Amp = %8.2f ppb'%(obs_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.72, 'Model A Amp = %8.2f ppb'%(model_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.55, 'Obs D Phase = %8.2f'%(obs_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.52, 'Model D Phase = %8.2f'%(model_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.45, 'Obs HA Phase = %8.2f'%(obs_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.42, 'Model HA Phase = %8.2f'%(model_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red') obs_a_ph = obs_annual_phase[ind] mod_a_ph = model_annual_phase[ind] if obs_a_ph > 12: obs_a_ph = obs_a_ph-12. if mod_a_ph > 12: mod_a_ph = mod_a_ph-12. axo2.text(0.01, 0.35, 'Obs A Phase = %8.2f'%(obs_a_ph),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.32, 'Model A Phase = %8.2f'%(mod_a_ph),transform=axo2.transAxes,fontweight='bold',color='red') axo2.text(0.01, 0.15, 'Obs Ave = %8.2f ppb'%(obs_ave[ind]),transform=axo2.transAxes,fontweight='bold') axo2.text(0.01, 0.12, 'Model Ave = %8.2f ppb'%(model_ave[ind]),transform=axo2.transAxes,fontweight='bold',color='red') axo2.axvline(1.,ymin=0,ymax=1,color='blue',linestyle='--') axo2.axvline(182.625,ymin=0,ymax=1,color='blue',linestyle='--') axo2.axvline(365.25,ymin=0,ymax=1,color='blue',linestyle='--') axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter()) plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter)) plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter)) axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,lat_n,lon_n)) plt.legend(loc = 'lower right') plt.tight_layout() axo.grid() axo2.grid() plt.show()
end_year = 2008 d0 = datetime.date(start_year, 1, 1) d1 = datetime.date(end_year+1, 1, 1) delta = d1 - d0 n_days = delta.days all_hours = np.arange(0,n_days,1./24.) group = Dataset('GAW_SURFACE_O3_1971_2009.nc') site_group = group.groups['hpb'] vals = site_group.variables['o3'][:] date = site_group.variables['date'][:] time = site_group.variables['time'][:] current_time = modules.date_process(date,time,start_year) print current_time print vals valid = vals > 0 vals = vals[valid] current_time = current_time[valid] print current_time all_hours = np.arange(np.min(current_time),np.max(current_time)+1./48.,1./24.) f = interpolate.interp1d(current_time, vals) vals = f(all_hours)
def run_LSP(model_data, x): print obs_refs[x] vals = model_data #check obs vals are valid valid = vals >= 0 vals = vals[valid] model_time_val = model_time[valid] model_date_val = model_date[valid] full_times = modules.date_process(model_date, model_time, start_year) if timeres == 'M': full_times_year = full_times[:12] else: full_times_year = full_times[:8766] full_times_day = full_times[:24] valid_times = modules.date_process(model_date_val, model_time_val, start_year) site_lon = obs_lons[x] #convert site_lon to 0 to 360 degs if site_lon < 0: site_lon = 360 - np.abs(site_lon) #transform from UTC time to solar time sun_time = lon_step_time * site_lon time_diff = sun_time - 0 if time_diff > 12: time_diff = time_diff - 24 #make time start from 0 valid_times_from0 = modules.phase_start_correct(valid_times) periodic_periods = [ 1. / 4., 1. / 3., 1. / 2., 1., 365.25 / 4., 365.25 / 3., 365.25 / 2., 365.25 ] periods, mag, ph, fr, fi = modules.take_lomb_spec( valid_times_from0, vals, w=True, key_periods=periodic_periods) #get mean of values mean_array = np.average(vals) #correct all phases for start point (not actually being from 0 - just corrected to be) ph = modules.phase_start_point_correct_all(periodic_periods, ph, valid_times) key_diurnal_periods = [1. / 4., 1. / 3., 1. / 2., 1.] key_seasonal_periods = [365.25 / 4., 365.25 / 3., 365.25 / 2., 365.25] diurnal_mags = mag[:4] seasonal_mags = mag[4:] seasonal_phs = ph[4:] #get individual mags and phases daily_h3_mag = mag[0] daily_h2_mag = mag[1] daily_h1_mag = mag[2] orig_daily_mag = mag[3] daily_h3_ph = ph[0] daily_h2_ph = ph[1] daily_h1_ph = ph[2] orig_daily_ph = ph[3] seasonal_h3_mag = mag[4] seasonal_h2_mag = mag[5] seasonal_h1_mag = mag[6] annual_mag = mag[7] seasonal_h3_ph = ph[4] seasonal_h2_ph = ph[5] seasonal_h1_ph = ph[6] annual_ph = ph[7] #convert sub diurnal phases from UTC to solar time daily_h3_ph = modules.solar_time_phase_corrector(daily_h3_ph, 6, time_diff) daily_h2_ph = modules.solar_time_phase_corrector(daily_h2_ph, 24. / 3., time_diff) daily_h1_ph = modules.solar_time_phase_corrector(daily_h1_ph, 12, time_diff) orig_daily_ph = modules.solar_time_phase_corrector(orig_daily_ph, 24, time_diff) diurnal_phs = [daily_h3_ph, daily_h2_ph, daily_h1_ph, orig_daily_ph] #convolve annual cycle and harmonics to seasonal waveform for 1 year seasonal_mag, seasonal_min_ph, seasonal_max_ph, seasonal_waveform, seasonal_ff = modules.period_convolution( key_seasonal_periods, full_times_year, seasonal_mags, seasonal_phs, mean_array) #convolve diurnal cycle and harmonics to diurnal waveform for 1 day diurnal_mag, diurnal_min_ph, diurnal_max_ph, diurnal_waveform, diurnal_ff = modules.period_convolution( key_diurnal_periods, full_times_day, diurnal_mags, diurnal_phs, mean_array) #convolve all full_mag, full_min_ph, full_max_ph, full_waveform, full_ff = modules.period_convolution( periodic_periods, full_times, mag, ph, mean_array) #convert phase to time daily_h3_ph = modules.convert_phase_units_actual_single(daily_h3_ph, 6.) daily_h2_ph = modules.convert_phase_units_actual_single( daily_h2_ph, 24. / 3.) daily_h1_ph = modules.convert_phase_units_actual_single(daily_h1_ph, 12.) orig_daily_ph = modules.convert_phase_units_actual_single( orig_daily_ph, 24.) diurnal_min_ph = modules.convert_phase_units_actual_single( diurnal_min_ph, 24.) diurnal_max_ph = modules.convert_phase_units_actual_single( diurnal_max_ph, 24.) seasonal_h3_ph = modules.convert_phase_units_actual_single( seasonal_h3_ph, 3.) seasonal_h2_ph = modules.convert_phase_units_actual_single( seasonal_h2_ph, 4.) seasonal_h1_ph = modules.convert_phase_units_actual_single( seasonal_h1_ph, 6.) annual_ph = modules.convert_phase_units_actual_single(annual_ph, 12.) seasonal_min_ph = modules.convert_phase_units_actual_single( seasonal_min_ph, 12.) seasonal_max_ph = modules.convert_phase_units_actual_single( seasonal_max_ph, 12.) return (x, daily_h3_mag, daily_h3_ph, daily_h2_mag, daily_h2_ph, daily_h1_mag, daily_h1_ph, orig_daily_mag, orig_daily_ph, diurnal_mag, diurnal_min_ph, diurnal_max_ph, seasonal_h3_mag, seasonal_h3_ph, seasonal_h2_mag, seasonal_h2_ph, seasonal_h1_mag, seasonal_h1_ph, annual_mag, annual_ph, seasonal_mag, seasonal_min_ph, seasonal_max_ph, mean_array, diurnal_waveform, seasonal_waveform, full_waveform)
#---------------------------------------- #find model data gridbox to compare with obs. #get model gridbox for obs site lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,obs_lat,obs_lon) model_var = model_var[:,lat_n,lon_n] model_var = model_var*1e9 model_var_mask = np.ma.masked_where(model_var<=0,model_var) model_ave = np.ma.average(model_var_mask) #-------------------------------------------- #take half daily average of obs and model obs_time = modules.date_process(obs_date,obs_time,start_year) model_time = modules.date_process(model_date,model_time,start_year) divisor = 6 #take half daily average of obs total_len = len(obs_var_mask)/divisor start = 0 end = divisor ave_obs_var = [] ave_obs_time = [] for i in range(total_len): ave = np.ma.average(obs_var_mask[start:end]) ave_obs_time=np.append(ave_obs_time,obs_time[start]) ave_obs_var=np.append(ave_obs_var,ave) start+=divisor
def site_iter_process(valid_refs,c): #process data for each site at a time #for site_ref in valid_refs: site_ref = valid_refs[c] data_valid = True print 'ref = ',site_ref site_test = all_refs == site_ref site_yyyymmdd = yyyymmdd[site_test] site_hhmm = hhmm[site_test] site_vals = vals[site_test] site_mm = all_mm[site_test] site_units = units[site_test] if species == 'ISOP': site_sample_len = sample_len[site_test] #check for data below limit of detection (only for ISOP) as other species have LOD check by line in file. If it is change to -99999 #LOD for ISOP if 0.01 ppbv if species == 'ISOP': lod_test = site_vals < 0.01 #convert from ppm to ppb if (species == 'O3') or (species == 'NO') or (species == 'NO2'): for i in range(len(site_vals)): if site_units[i] == 'Parts per million': site_vals[i] = site_vals[i]*1.e3 elif site_units[i] == 'Parts per billion': pass else: print site_units[i] 1+'a' # convert from ppbC to ppb if species == 'ISOP': for i in range(len(site_vals)): #078 is Parts per billion Carbon, Isoprene has 5 Carbons if site_units[i] == '078': site_vals[i] = site_vals[i]/5. #008 is Parts per billion if site_units[i] == '008': pass #101 is Parts per million Carbon if site_units[i] == '101': site_vals[i] = (site_vals[i]/5.)*1.e3 site_vals[lod_test] = -99999 #put vals into full grid date_con = np.array(site_yyyymmdd).astype(int) time_con = np.array(site_hhmm).astype(int) #create max possible o3 grid full_data = np.empty(n_hours) full_data[:] = -99999 #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) syn_grid_time = np.arange(0,n_days,1./24) syn_grid_time = np.round(syn_grid_time,decimals=5) #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') site_vals = np.array(site_vals) #if date goes past where it should finish, omit it. inv_i = indices < len(full_data) indices = indices[inv_i] site_vals = site_vals[inv_i] full_data[indices] = site_vals #get site meta meta_index = meta_refs.index(site_ref) lat = np.float64(meta_lats[meta_index]) lon = np.float64(meta_lons[meta_index]) alt = np.float64(meta_alts[meta_index]) raw_class_name = meta_class[meta_index] #get measurement method, take mode of big methods array site_mm = stats.mode(site_mm)[0][0] if (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETABSORPTION') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET2BMODEL202') or (site_mm.upper() == 'INSTRUMENTAL-UVPHOTOMETRIC') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETRADIATIONABSORBTN') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETPHOTOMETRY') or (site_mm.upper() == 'INSTRUMENTAL-UVABSORPTIONPHOTOMETRY/UV2BMODEL202AND205') or (site_mm.upper() == 'INSTRUMENTAL-ECOTECHSERINUS10'): mm = 'ultraviolet photometry' elif (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GASPHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEAPIMODEL265EANDT265') or (site_mm.upper() == 'LOWLEVELNOXINSTRUMENTAL-TECO42SCHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GAS-PHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPIT200UPPHOTOLYTIC') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPI200EU/501') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9841T') or (site_mm.upper() == 'TELEDYNE-APIMODEL200EUPORT200UP-PHOTOLYTIC-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-TL,42I-TL') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCERHODAMINEBDYE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-Y,42I-Y') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9843'): mm = 'chemiluminescence' elif (site_mm.upper() == 'INSTRUMENTAL-OPENPATHO3ANALYZER') or (site_mm.upper() == 'INSTRUMENTAL-OPENPATHNOANALYZER'): mm = 'differential optical absorption spectrosocopy' elif (site_mm.upper() == 'TELEDYNEMODELT500U-CAVITYATTENUATEDPHASESHIFTSPECTROSCOPY'): mm = 'cavity attenuated phase shift spectroscopy' elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-GRIESS-SALTZMAN') or (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC'): mm = 'griess saltzman colorimetric' elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-LYSHKOW(MOD)'): mm = 'lyshkow colorimetric ' elif (site_mm.upper() == 'INSTRUMENTAL-COULOMETRIC'): mm = 'coulometry' else: print site_mm.upper() 1+'a' #do data quality checks full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year) #set site file resolution file_res = 'H' #set sampling as average st = 'average' anthrome_class_name = 'na' return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
#find model data gridbox to compare with obs. #get model gridbox for obs site lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,obs_lat,obs_lon) model_var = model_var[:,lat_n,lon_n] model_var = model_var*1e9 model_var_mask = np.ma.masked_where(model_var<=0,model_var) model_ave = np.ma.average(model_var_mask) model_var = model_var[~np.isnan(model_var_mask)] #-------------------------------------------- #get valid data and process time obs_time = np.array(modules.date_process(obs_date,obs_time,start_year)) model_time = np.array(modules.date_process(model_date,model_time,start_year)) model_test = model_var >= 0 model_var = model_var[model_test] model_time = model_time[model_test] obs_test = obs_var >= 0 obs_var = obs_var[obs_test] obs_time = obs_time[obs_test] #-------------------------------------------- #take LSP's #windowing? wind_set = raw_input('Windowing? Y or N?\n')
def site_iter_process(valid_refs, c): # for each valid location process # limit obs data due for each site in valid_obs_site_names # for c in range(len(valid_refs)): all_lat = [] all_lon = [] all_alt = [] all_st = [] all_mm = [] site_ref = valid_refs[c] file_valid = True data_valid = True print site_ref file_res = data_resolutions[c] print file_res # read files for each valid site s_files = sorted( glob.glob("/work/home/db876/observations/surface/%s/GAW/%s**.%s**.dat" % (species, site_ref.lower(), file_res)) ) print s_files if file_res == "hr": site_files = sorted(s_files, key=lambda x: x.split(".hr")[1]) else: site_files = sorted(s_files) delete_inds = [] if file_res == "hr": # limit site files before and after year limit for i in range(len(site_files)): f = site_files[i] year = f.split(".hr")[1][:4] if int(year) < int(start_year): delete_inds.append(i) if int(year) > int(end_year): delete_inds.append(i) site_files = np.delete(site_files, delete_inds) print site_files site_file_len = len(site_files) s_count = 0 start_ind = 0 end_ind = 0 for f in site_files: print f read = np.loadtxt(f, dtype="S10,S5,f8", comments="C", usecols=(0, 1, 4), unpack=True) read = np.array(read) dates = read[0, :] times = read[1, :] conc = read[2, :] conc = np.array(conc) conc = conc.astype(float) # change all vals < 0 to np.NaN inv_test = conc < 0 conc[inv_test] = np.NaN start_ind = end_ind end_ind += len(conc) s_count += 1 units = [] mycsv = csv.reader(open(f)) row_count = 0 for row in mycsv: if row_count == 11: val = " ".join(row) lat = val.replace(" ", "") lat = lat[12:] lat = float(lat) all_lat.append(lat) # get lon if row_count == 12: val = " ".join(row) lon = val.replace(" ", "") lon = lon[13:] lon = float(lon) all_lon.append(lon) # get altitude if row_count == 13: val = " ".join(row) alt = val.replace(" ", "") alt = alt[12:] alt = float(alt) all_alt.append(alt) # get units if row_count == 20: val = " ".join(row) unit = val.replace(" ", "") unit = unit[19:] # get measurement method if row_count == 21: val = " ".join(row) mm = val.replace(" ", "") mm = mm[21:] all_mm.append(mm) # get sampling type if row_count == 22: val = " ".join(row) st = val.replace(" ", "") st = st[16:] all_st.append(st) if row_count == 23: val = " ".join(row) tz = val.replace(" ", "") tz = tz[12:] row_count += 1 # test if units are in ppb for each file - if not convert if (unit != "ppb") & (unit != "ppbv"): if (unit == "ug/m3") or (unit == "ugN/m3"): print "converting units, temp = 20degC" # calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144 / mol_mass * (273.15 + 20) / (1013.25 / 10) conc = conv_fact * conc elif (unit == "ug/m3-20C") or (unit == "ugN/m3-20C"): print "converting units, temp = 20degC" # calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144 / mol_mass * (273.15 + 20) / (1013.25 / 10) conc = conv_fact * conc elif (unit == "ug/m3-25C") or (unit == "ugN/m3-25C") or (unit == "ug/m3at25C"): print "converting units, temp = 25degC" # calculate conversion factor from mg/m3 assuming 25 degC and 1 atm # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144 / mol_mass * (273.15 + 25) / (1013.25 / 10) conc = conv_fact * conc elif (unit == "mg/m3-20C") or (unit == "mgN/m3-20C"): print "converting units, temp = 25degC" # calculate conversion factor from mg/m3 assuming 25 degC and 1 atm # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144 / mol_mass * (273.15 + 20) / (1013.25 / 10) conc = (conv_fact * conc) * 1e3 elif (unit == "mg/m3-25C") or (unit == "mgN/m3-25C"): print "converting units, temp = 25degC" # calculate conversion factor from mg/m3 assuming 25 degC and 1 atm # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144 / mol_mass * (273.15 + 25) / (1013.25 / 10) conc = (conv_fact * conc) * 1e3 elif (unit == "ppm") or (unit == "ppmv"): conc = conc * 1.0e3 elif (unit == "ppt") or (unit == "pptv"): conc = conc / 1.0e3 else: print "Unknown Unit" print unit 1 + "a" break if tz != "UTC": if tz == "": if site_ref.lower() in ["plm"]: tz = -5 if site_ref.lower() in ["kos", "edm", "vdl", "nwr"]: tz = 0 if site_ref.lower() in [ "jfj", "kps", "rig", "pay", "glh", "cmn", "zep", "dig", "hhe", "ktb", "stp", "ivn", "jcz", "kam", "lzp", "snz", "zbl", "kmw", "don", "mhn", "nia", "roq", "spm", ]: tz = 1 if site_ref.lower() in ["rcv", "aht", "oul", "uto", "vir", "fdt", "sem", "stn"]: tz = 2 if site_ref.lower() in ["dak"]: tz = 3 if site_ref.lower() in ["shp"]: tz = 4 if site_ref.lower() in ["isk"]: tz = 5 if site_ref.lower() in ["hkg"]: tz = 8 if site_ref.lower() in ["cgo"]: tz = 10 else: tz = tz.replace("LocaltimeUTC", "") tz = tz.replace("OtherUTC", "") tz = tz.replace("Localtime", "") tz = tz.replace(":", ".") try: before, sep, after = tz.rpartiton(".") after = int(after) conv = (100.0 / 60) * after tz = before + sep + str(conv) except: 1 + 1 tz = float(tz) else: tz = 0 # check tz is whole number else skip site if (tz % 1) != 0: print "File Invalid, timezone is not a whole number." conc[:] = -99999 # process dates from date, time to days since start year dates = [s.replace("-", "") for s in dates] times = [s.replace(":", "") for s in times] if file_res == "hr": # some times go from 0100 to 2400, assume this is when sites report ave for hour previous. Thus all times should have hour minused for i in range(len(times)): if times[i] == "2400": current_date = dates[i] test = np.array(dates) == current_date indices = [i for i, x in enumerate(test) if x] for x in indices: current_time = times[x] if current_time == "2400": current_time = "0000" date_datetime = datetime.datetime( int(current_date[0:4]), int(current_date[4:6]), int(current_date[6:]), int(current_time[:2]), int(current_time[2:]), ) date_datetime = date_datetime - datetime.timedelta(hours=1) times[x] = date_datetime.strftime("%H%M") # adjust dates and times if tz is not equal to 0 if tz != 0: for i in range(len(dates)): # create datetime dt = datetime.datetime( int(dates[i][:4]), int(dates[i][4:6]), int(dates[i][6:]), int(times[i][:2]), int(times[i][2:]) ) if tz > 0: # print 'Old dt', dt dt = dt - datetime.timedelta(hours=int(tz)) # print 'New dt', dt elif tz < 0: # print 'Old dt', dt dt = dt + datetime.timedelta(hours=np.abs(int(tz))) # print 'New dt', dt dates[i] = dt.strftime("%Y%m%d") times[i] = dt.strftime("%H%M") data = [dates, times, conc] try: big_list = np.hstack((big_list, data)) except: big_list = np.array(data) if s_count == site_file_len: # make sure big list exists try: big_list except: data_valid = False if data_valid == True: # get dates and times date_con = big_list[0, :] time_con = big_list[1, :] # get vals vals = np.array(big_list[2, :]).astype(float) # delete big list del big_list # if dates outside what asked for exclude first_date_val = int("%s0101" % (start_year)) last_date_val = int("%s1231" % (end_year)) test_valid = (np.array(date_con).astype(int) >= first_date_val) & ( np.array(date_con).astype(int) <= last_date_val ) date_con = date_con[test_valid] time_con = time_con[test_valid] vals = vals[test_valid] # Check if any times are duplicate, if so delete all but first del_list = [] for d in range(len(date_con) - 1): if (date_con[d] == date_con[d + 1]) & (time_con[d] == time_con[d + 1]): del_list.append(d + 1) if len(del_list) > 0: print "Deleting duplicate timepoints" print date_con[del_list], time_con[del_list] date_con = np.delete(date_con, del_list) time_con = np.delete(time_con, del_list) vals = np.delete(vals, del_list) # if file resolution is daily or monthly then replicate times after point, to fill hourly data array. count = 0 if file_res == "da": file_hours = len(date_con) for i in range(file_hours): current_hh = int(time_con[count][:2]) current_mm = int(time_con[count][2:]) s = datetime.datetime(year=start_year, month=1, day=1, hour=current_hh, minute=current_mm) e = datetime.datetime(year=start_year, month=1, day=2, hour=current_hh, minute=current_mm) day_hours = [d.strftime("%H%M") for d in pd.date_range(s, e, freq="H")][1:-1] date_con = np.insert(date_con, count + 1, [date_con[count]] * 23) time_con = np.insert(time_con, count + 1, day_hours) vals = np.insert(vals, count + 1, [vals[count]] * 23) count += 24 if file_res == "mo": file_hours = len(date_con) for i in range(file_hours): current_year = int(date_con[count][:4]) current_month = int(date_con[count][4:6]) next_month = current_month + 1 if next_month > 12: next_month = 1 next_year = current_year + 1 else: next_year = current_year s = datetime.datetime(year=current_year, month=current_month, day=1, hour=1, minute=0) e = datetime.datetime(year=next_year, month=next_month, day=1, hour=0, minute=0) day_date = [d.strftime("%Y%m%d") for d in pd.date_range(s, e, freq="H")][:-1] day_hour = [d.strftime("%H%M") for d in pd.date_range(s, e, freq="H")][:-1] date_con = np.insert(date_con, count + 1, day_date) time_con = np.insert(time_con, count + 1, day_hour) vals = np.insert(vals, count + 1, [vals[count]] * len(day_date)) count += len(day_date) + 1 date_con = np.array(date_con).astype(int) time_con = np.array(time_con).astype(int) # create max possible o3 grid o3_data = np.empty(n_hours) o3_data[:] = -99999 # delete dates,times and var outside date range val_test = (date_con >= int(output_res_dates_strings[0])) & ( date_con <= int(output_res_dates_strings[-1]) ) date_con = date_con[val_test] time_con = time_con[val_test] vals = vals[val_test] print date_con # find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con, time_con, start_year) converted_time = np.round(converted_time, decimals=5) syn_grid_time = np.arange(0, n_days, 1.0 / 24) syn_grid_time = np.round(syn_grid_time, decimals=5) # find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side="left") o3_data[indices] = vals # convert all Nans back to -99999 test = np.isnan(o3_data) o3_data[test] = -99999 # get mode of metadata lat = np.float64(stats.mode(all_lat)[0][0]) lon = np.float64(stats.mode(all_lon)[0][0]) alt = np.float64(stats.mode(all_alt)[0][0]) st = stats.mode(all_st)[0][0] mm = stats.mode(all_mm)[0][0] # check site is not urban using anthrome map from 2000 anthfile = "/work/home/db876/plotting_tools/core_tools/anthro2_a2000.nc" anthload = Dataset(anthfile) class_valid, anthrome_class_name = modules.anthrome_classify(anthload, [lat], [lon]) if class_valid == "invalid": data_valid = False print "Site Invalid, site classed as urban by anthrome map." # get measurement type and sampling type (take mode from collected list) if (st == "continuous") or ( st == "continuous(carbondioxide),remotespectroscopicmethod(methaneandsurfaceozone)" ): st = "average" elif st == "flask": st = "flask" elif st == "filter": st = "filter" else: print st 1 + "a" if mm == "Lightabsorptionanalysis(UV)": mm = "ultraviolet photometry" elif mm == "CavityRingdownSpectroscopy": mm = "cavity ringdown spectroscopy" elif mm == "NDIR": site_mm = "non-dispersive infrared spectroscopy" elif mm == "GasChromatography(FID)": site_mm = "gas chromatography flame ionisation detection" elif mm == "Gas Chromatography (RGD)": site_mm = "gas chromatography reduction gas detection" elif mm == "Chemiluminescence": mm = "chemiluminescence" elif (mm == "Spectrophotometry") or ( mm == "spectrophotometry,naphthyl-ethylenediaminedihydrochloridemethod" ): mm = "spectrophotometry" elif mm == "": if species == "O3": mm = "ultraviolet photometry" if species == "CO": mm = "non-dispersive infrared spectroscopy" if species == "NO2": mm = "chemiluminescence" if species == "NO": mm = "chemiluminescence" if species == "ISOP": mm = "gas chromatography flame ionisation detection" # do data quality checks full_data, data_valid = modules.quality_check( o3_data, data_valid, data_resolution, alt, grid_dates, start_year, end_year ) # convert file res to standard format if file_res == "hr": file_res = "H" elif file_res == "da": file_res = "D" elif file_res == "mo": file_res = "M" # no raw class so set as na raw_class_name = "na" return c, full_data, data_valid, lat, lon, alt, raw_class_name, anthrome_class_name, mm, st, file_res
def site_iter_process(valid_refs, c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] site_resolutions = [] site_ref = valid_refs[c] data_valid = True print 'ref = ', site_ref, c if species != 'ISOP': site_test = all_refs == site_ref site_yyyymmdd = yyyymmdd[site_test] site_hhmm = hhmm[site_test] site_vals = vals[site_test] n_dup_array = np.array([0] * len(site_vals)) else: if site_ref[0] == '0': site_ref = site_ref[1:] files = [] site_yyyymmdd = [] site_hhmm = [] site_vals = [] n_dup_array = [] for y in all_years: try: files.append( glob.glob('../CANADANAPS/VOC%s/S%s*' % (y, site_ref))) except: pass files = [item for sublist in files for item in sublist] for f in files: print f all_data = get_data(f) all_data = all_data.values() test_header_range = range(0, 10) for x in test_header_range: headers = all_data[0][x] if 'Isoprene' in headers: header_x = x break data_cut = all_data[0][header_x + 1:] var_i = headers.index('Isoprene') #date_i = headers.index('Sample Date') date_i = headers.index('Compounds') time_i = headers.index('START TIME') duration_i = headers.index('DURATION') for i in range(len(data_cut)): row_cut = data_cut[i] try: dur = float(row_cut[duration_i]) if dur.is_integer() == False: dur = round(dur, 0) except: #round to nearest hour if necessary if float(row_cut[duration_i].strftime("%M")) != 0: if dur >= 30: dur = float(row_cut[duration_i].strftime("%H")) + 1 else: dur = float(row_cut[duration_i].strftime("%H")) else: dur = float(row_cut[duration_i].strftime("%H")) if dur.is_integer() == False: print 'duration is float' 1 + 'a' try: val = np.float64(row_cut[var_i]) except: val = -99999 if dur == 1: site_resolutions.append('H') #if (val >= 0.01): # site_vals.append([val]) #else: # site_vals.append([-99999]) site_vals.append([val]) n_dup_array.append([0]) site_yyyymmdd.append([row_cut[date_i].strftime("%Y%m%d")]) try: site_hhmm.append( [row_cut[time_i][:2] + row_cut[time_i][3:5]]) except: #round to nearest hour if necessary ti = row_cut[time_i].strftime("%H%M") if float(row_cut[time_i].strftime("%M")) != 0: print 'non whole time = ', row_cut[time_i] if float(row_cut[time_i].strftime("%M")) >= 30: site_hhmm.append([ datetime.time(hour=int(ti[:2]) + 1, minute=0).strftime("%H%M") ]) else: site_hhmm.append([ datetime.time(hour=int(ti[:2]), minute=0).strftime("%H%M") ]) else: site_hhmm.append( [row_cut[time_i].strftime("%H%M")]) #deal with sample lens > 1 hour else: if output_res == 'H': continue else: site_resolutions.append('D') #if (val >= 0.01): # site_vals.append([val]) #else: # site_vals.append([-99999]) site_vals.append([val]) n_dup_array.append([0]) try: site_yyyymmdd.append( [row_cut[date_i].strftime("%Y%m%d")]) except: print row_cut[date_i] 1 + 'a' try: site_hhmm.append( [row_cut[time_i][:2] + row_cut[time_i][3:5]]) except: #round to nearest hour if necessary ti = row_cut[time_i].strftime("%H%M") if float(row_cut[time_i].strftime("%M")) != 0: print 'non whole time = ', row_cut[time_i] if float(row_cut[time_i].strftime("%M")) >= 30: site_hhmm.append([ datetime.time( hour=int(ti[:2]) + 1, minute=0).strftime("%H%M") ]) else: site_hhmm.append([ datetime.time( hour=int(ti[:2]), minute=0).strftime("%H%M") ]) else: site_hhmm.append( [row_cut[time_i].strftime("%H%M")]) current_year = int(site_yyyymmdd[-1][0][:4]) current_month = int(site_yyyymmdd[-1][0][4:6]) current_day = int(site_yyyymmdd[-1][0][6:]) current_hh = int(site_hhmm[-1][0][:2]) current_mm = int(site_hhmm[-1][0][2:]) s = datetime.datetime(year=current_year, month=current_month, day=current_day, hour=current_hh, minute=current_mm) e = s + datetime.timedelta(hours=dur) day_dates = [ d.strftime('%Y%m%d') for d in pd.date_range(s, e, freq='H') ][1:-1] day_hours = [ d.strftime('%H%M') for d in pd.date_range(s, e, freq='H') ][1:-1] site_yyyymmdd.append(day_dates) site_hhmm.append(day_hours) site_vals.append([site_vals[-1][0]] * len(day_dates)) #append to n duplicated array n_dup_array.append([0]) n_dup_array.append([1] * len(day_dates)) if species == 'ISOP': site_yyyymmdd = [item for sublist in site_yyyymmdd for item in sublist] site_hhmm = [item for sublist in site_hhmm for item in sublist] site_vals = [item for sublist in site_vals for item in sublist] n_dup_array = np.array( [item for sublist in n_dup_array for item in sublist]) if len(site_ref) == 5: site_ref = '0' + site_ref site_vals = np.float64(site_vals) #add val to total obs count n_all += len(site_vals) #test if site_ref in meta_refs, if not then exit if site_ref not in meta_refs: print site_ref inv_nometa += 1 print 'Site Invalid. No Metadata for ref' if no2_type == 'MOLYBDENUM': n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_obs_after_anyvaliddata, inv_nokeymeta, n_obs_after_nokeymeta, inv_resolution, n_obs_after_resolution, inv_badmeasurementmethod, n_obs_after_badmeasurementmethod = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] exit_r = 'nometa' return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) n_after_nometa += len(site_vals) #convert all invalids to -99999 test_inv = site_vals < 0 site_vals[test_inv] = -99999 #create max possible grid full_data = np.empty(n_hours) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #get meta meta_index = meta_refs.index(site_ref) data_tz = np.float32(meta_tz[meta_index]) all_tz = [data_tz] try: lat = np.float32(meta_lats[meta_index]) except: lat = 'na' try: lon = np.float32(meta_lons[meta_index]) except: lon = 'na' try: alt = np.float32(meta_alts[meta_index]) except: alt = 'na' raw_class_name = meta_class[meta_index] site_name = meta_sitenames[meta_index] unit = 'na' contact = meta_contacts[meta_index] country = meta_countries[meta_index] #adjust dates and times if tz is not equal to 0 tz = int(data_tz) if tz != 0: for i in range(len(site_yyyymmdd)): #create datetime dt = datetime.datetime(int(site_yyyymmdd[i][:4]), int(site_yyyymmdd[i][4:6]), int(site_yyyymmdd[i][6:]), int(site_hhmm[i][:2]), int(site_hhmm[i][2:])) if tz > 0: dt = dt - datetime.timedelta(hours=int(tz)) elif tz < 0: dt = dt + datetime.timedelta(hours=np.abs(int(tz))) site_yyyymmdd[i] = dt.strftime("%Y%m%d") site_hhmm[i] = dt.strftime("%H%M") #put vals into full grid date_con = np.array(site_yyyymmdd).astype(int) time_con = np.array(site_hhmm).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] site_vals = site_vals[test_inds] n_dup_array = n_dup_array[test_inds] #set st_big and mm_big st_big = ['continuous'] * len(site_vals) if species == 'O3': mm_big = ['ultraviolet photometry'] * len(site_vals) elif species == 'NO': mm_big = ['chemiluminescence'] * len(site_vals) elif species == 'NO2': mm_big = ['chemiluminescence (conversion-molybdenum)'] * len(site_vals) elif species == 'CO': mm_big = ['non-dispersive infrared spectrometry'] * len(site_vals) elif species == 'ISOP': mm_big = ['gas chromatography mass selective detection' ] * len(site_vals) #get obs valid after flagsandlod test = site_vals != -99999 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = len(site_vals[test] - valid_hours_dup) n_after_flagsandlod += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con, time_con, start_year) converted_time = np.round(converted_time, decimals=5) syn_grid_time = np.arange(0, n_days, 1. / 24) syn_grid_time = np.round(syn_grid_time, decimals=5) raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') site_vals = np.array(site_vals) full_data_after_flagsandlod[raw_indices] = site_vals raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) # test and remove duplicate and overlap points converted_time, site_vals, mm_big, st_big, n_dup_array = modules.remove_duplicate_points( site_ref, converted_time, site_vals, mm_big, st_big, n_dup_array, output_res) test = site_vals != -99999 valid_hours_dup = np.sum(n_dup_array[test]) n_obs_valid = int(len(site_vals[test]) - valid_hours_dup) print 'n obs valid = ', n_obs_valid n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = site_vals big_n_dup_array[indices] = n_dup_array #if species is CO then convert units from ppmv to ppbv if species == 'CO': valid_inds = full_data != -99999 full_data[valid_inds] = full_data[valid_inds] * 1e3 #if species is ISOP then connvert units from mg/m3 to ppbv if species == 'ISOP': #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10 conv_fact = 8.3144 / mol_mass * (273.15 + 25) / (1013.25 / 10) valid_inds = full_data != -99999 full_data[valid_inds] = full_data[valid_inds] * conv_fact key_meta = [lat, lon, alt] #set site file resolution if (species == 'O3') or (species == 'CO') or (species == 'NO') or (species == 'NO2'): file_res = 'H' else: # if no valid data then site res does not matter if len(site_resolutions) == 0: file_res = 'na' else: #if all site resolutions are same continue then take first file_res all_same = all(x == site_resolutions[0] for x in site_resolutions) if all_same == True: file_res = site_resolutions[0] else: #otherwise take highest frequency res as file_res if 'M' in site_resolutions: file_res = 'M' elif 'D' in site_resolutions: file_res = 'D' else: file_res = 'H' #get sampling/instrument grids raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup( site_ref, process_group, species, raw_st, raw_mm, full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list, unknown_mm_refs_list, no2_type) #do quality checks data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control( site_ref, species, file_res, no2_type, grid_dates, full_data, big_n_dup_array, valid_hours_dup, raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] meta = [ lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na' ] return c, ['na'], ['na'], [ 'na' ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros( 1) #make tz int after checks data_tz = np.float32(data_tz) #set processed unit p_unit = 'pbbv' #get local timezone try: local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24 - int(datetime_offset.seconds / 60 / 60)) else: local_tz = int(datetime_offset.seconds / 60 / 60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref) unknown_local_tz_list.append(site_ref) #pack meta meta = [ lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz, local_tz, site_name, country, contact ] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([ inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution, inv_badmeasurementmethod ]) n_c_list = np.array([ n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate, n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution, n_after_badmeasurementmethod ]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [ unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list ] return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
#find model data gridbox to compare with obs. #get model gridbox for obs site lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, obs_lat, obs_lon) model_var = model_var[:, lat_n, lon_n] model_var = model_var * 1e9 model_var_mask = np.ma.masked_where(model_var <= 0, model_var) model_ave = np.ma.average(model_var_mask) model_var = model_var[~np.isnan(model_var_mask)] #-------------------------------------------- #get valid data and process time obs_time = np.array(modules.date_process(obs_date, obs_time, start_year)) model_time = np.array(modules.date_process(model_date, model_time, start_year)) model_test = model_var >= 0 model_var = model_var[model_test] model_time = model_time[model_test] obs_test = obs_var >= 0 obs_var = obs_var[obs_test] obs_time = obs_time[obs_test] #-------------------------------------------- #take LSP's #windowing? wind_set = raw_input('Windowing? Y or N?\n')
def site_iter_process(valid_refs,c): #set local counts inv_nometa = 0 inv_anyvaliddata = 0 inv_nokeymeta = 0 inv_resolution = 0 inv_badmeasurementmethod = 0 n_all = 0 n_after_nometa = 0 n_after_flagsandlod = 0 n_after_duplicate = 0 n_after_anyvaliddata = 0 n_after_nokeymeta = 0 n_after_resolution = 0 n_after_badmeasurementmethod = 0 #set local unknown lists unknown_mm_list = [] unknown_mm_refs_list = [] unknown_local_tz_list = [] data_valid = True site_ref = valid_refs[c] print 'ref = ',site_ref,c #read in site data from chunk site_yyyymmdd = a_site_yyyymmdd[c] site_hhmm = a_site_hhmm[c] site_vals = a_site_vals[c] mm_big = a_mm_big[c] site_units = a_site_units[c] site_res = a_site_res[c] n_dup_arr = a_n_dup_arr[c] lat = a_lat[c] lon = a_lon[c] alt = a_alt[c] unit = a_unit[c] raw_class_name = a_raw_class_name[c] site_name = a_site_name[c] no_meta = a_no_meta[c] country = 'United States' contact = '*****@*****.**' print '1' try: lat = np.float32(lat) except: pass try: lon = np.float32(lon) except: pass try: alt = np.float32(alt) except: pass #process data for each site at a time #for site_ref in valid_refs: #site_ref = valid_refs[c] #site_test = all_refs == site_ref #site_yyyymmdd = yyyymmdd[site_test] #site_hhmm = hhmm[site_test] #site_vals = vals[site_test] #mm_big = all_mm[site_test] #site_units = all_units[site_test] #if species == 'ISOP': # n_dup_arr = n_dup_array[site_test] # site_res = site_resolutions[site_test] #else: # n_dup_arr = np.zeros(len(site_vals)) #convert to ppb if (species == 'O3') or (species == 'NO') or (species == 'NO2') or (species == 'CO'): for i in range(len(site_vals)): if site_units[i] == 'Parts per million': site_vals[i] = site_vals[i]*1.e3 elif site_units[i] == 'Parts per billion': pass else: print site_units[i] 1+'a' # convert to ppb if species == 'ISOP': for i in range(len(site_vals)): #078 is Parts per billion Carbon, Isoprene has 5 Carbons if site_units[i] == 'Parts per billion Carbon': site_vals[i] = site_vals[i]/5. #008 is Parts per billion elif site_units[i] == 'Parts per billion': pass #101 is Parts per million Carbon elif site_units[i] == 'Parts per million Carbon': site_vals[i] = (site_vals[i]/5.)*1.e3 else: print site_units[i] 1+'a' #add val to total obs count valid_hours_dup = np.sum(n_dup_arr) n_all += len(site_vals) - valid_hours_dup #get site meta #try: # meta_index = meta_refs.index(site_ref) # try: # lat = np.float32(meta_lats[meta_index]) # except: # lat = 'na' # try: # lon = np.float32(meta_lons[meta_index]) # except: # lon = 'na' # try: # alt = np.float32(meta_alts[meta_index]) # except: # alt = 'na' #except: # pass #get local timezone try: local_tz_name = tz_root.tzNameAt(lat,lon,forceTZ=True) pytz_obj = pytz.timezone(local_tz_name) datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000,1,1)) if datetime_offset < datetime.timedelta(0): local_tz = -(24-int(datetime_offset.seconds/60/60)) else: local_tz = int(datetime_offset.seconds/60/60) except: local_tz = 'na' print 'TIMEZONE NOT KNOWN, SITE IS %s'%(site_ref) unknown_local_tz_list.append(site_ref) #if species is ISOP set data_tz as local_tz if species == 'ISOP': data_tz = int(local_tz) else: data_tz = 0 #test if site_ref in meta_refs, if not then exit #also test for ISOP if have local_tz if (no_meta == 'Yes') or (data_tz == 'na'): inv_nometa+=1 print 'Site Invalid. No Metadata for ref' if no2_type == 'MOLYBDENUM': n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_obs_after_anyvaliddata,inv_nokeymeta,n_obs_after_nokeymeta,inv_resolution,n_obs_after_resolution,inv_badmeasurementmethod,n_obs_after_badmeasurementmethod = 0,0,0,0,0,0,0,0,0,0,0,0,0 exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod]) n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod]) unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list] meta = ['na','na','na','na','na','na','na','na','na','na','na','na'] exit_r = 'nometa' return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1) valid_hours_dup = np.sum(n_dup_arr) n_after_nometa += len(site_vals) - valid_hours_dup #adjust dates and times if tz is not equal to 0 (only for ISOP) #use local tz calc to adjust times to UTC if species == 'ISOP': tz = int(data_tz) if tz != 0: for i in range(len(site_yyyymmdd)): #create datetime dt = datetime.datetime(int(site_yyyymmdd[i][:4]),int(site_yyyymmdd[i][4:6]),int(site_yyyymmdd[i][6:]),int(site_hhmm[i][:2]),int(site_hhmm[i][2:])) if tz > 0: dt = dt - datetime.timedelta(hours = int(tz)) elif tz < 0: dt = dt + datetime.timedelta(hours = np.abs(int(tz))) site_yyyymmdd[i] = dt.strftime("%Y%m%d") site_hhmm[i] = dt.strftime("%H%M") #put vals into full grid date_con = np.array(site_yyyymmdd).astype(int) time_con = np.array(site_hhmm).astype(int) #remove data < 1970 and >= 2015 test_inds = (date_con >= 19700101) & (date_con < 20150101) date_con = date_con[test_inds] time_con = time_con[test_inds] site_vals = site_vals[test_inds] mm_big = mm_big[test_inds] n_dup_arr = n_dup_arr[test_inds] #set st_big as 'continuous' st_big = ['continuous']*len(site_vals) #get obs valid test = site_vals >= 0 valid_hours_dup = np.sum(n_dup_arr[test]) n_obs_valid = len(site_vals[test]) - valid_hours_dup n_after_flagsandlod += n_obs_valid #create max possible grid full_data = np.empty(n_hours) full_data_after_flagsandlod = np.empty(n_hours) big_n_dup_array = np.zeros(n_hours) full_data[:] = -99999 full_data_after_flagsandlod[:] = -99999 #find matching times between actual times and grid of times, return big array of indices of matched indices in grid converted_time = modules.date_process(date_con,time_con,start_year) converted_time = np.round(converted_time,decimals=5) syn_grid_time = np.arange(0,n_days,1./24) syn_grid_time = np.round(syn_grid_time,decimals=5) raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left') site_vals = np.array(site_vals) full_data_after_flagsandlod[raw_indices] = site_vals raw_st = np.copy(st_big) raw_mm = np.copy(mm_big) #test and remove duplicate and overlap points converted_time,site_vals,mm_big,st_big,n_dup_arr = modules.remove_duplicate_points(site_ref,converted_time,site_vals,mm_big,st_big,n_dup_arr,output_res) test = site_vals >= 0 valid_hours_dup = np.sum(n_dup_arr[test]) n_obs_valid = int(len(site_vals[test]) - valid_hours_dup) n_after_duplicate += n_obs_valid #find matching times between actual times and grid of times, return big array of indices of matched indices in grid indices = np.searchsorted(syn_grid_time, converted_time, side='left') full_data[indices] = site_vals big_n_dup_array[indices] = n_dup_arr #unit = stats.mode(site_units)[0][0] #raw_class_name = meta_class[meta_index] #site_name = meta_sitenames[meta_index] #country = 'United States' #contact = '*****@*****.**' all_tz = [data_tz] key_meta = [lat,lon,alt] #set site file resolution if species != 'ISOP': file_res = 'H' else: #if all site resolutions are same continue then take first file_res all_same = all(x == site_res[0] for x in site_res) if all_same == True: file_res = site_res[0] else: #otherwise take highest frequency res as file_res if 'M' in site_res: file_res = 'M' elif 'D' in site_res: file_res = 'D' else: file_res = 'H' #get sampling/instrument grids raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,unknown_mm_list,unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(site_ref,process_group,species,raw_st,raw_mm,full_data_after_flagsandlod,full_data,raw_indices,unknown_mm_list,unknown_mm_refs_list,no2_type) print set(p_mm_grid) #do quality checks data_valid,full_data,valid_hours_dup,p_st_grid,p_mm_grid,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod,exit_r = modules.primary_quality_control(site_ref,species,file_res,no2_type,grid_dates,full_data,big_n_dup_array,valid_hours_dup,raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,data_resolution,n_obs_valid,key_meta,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod) if data_valid == False: exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod]) n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod]) unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list] meta = [lat,lon,alt,'na','na','na','na','na','na','na','na','na'] return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1) #set processed unit p_unit = 'pbbv' #pack meta meta = [lat,lon,alt,raw_class_name,file_res,unit,p_unit,data_tz,local_tz,site_name,country,contact] #if blank strings in meta then convert to 'na' for i in range(len(meta)): try: if meta[i].strip() == '': meta[i] = 'na' except: pass print set(raw_st_grid) print set(raw_mm_grid) print set(p_st_grid) print set(p_mm_grid) print meta exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod]) n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod]) print 'exit counts = ', exit_c_list print 'n obs counts = ', n_c_list unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list] return c,full_data,p_st_grid,p_mm_grid,data_valid,meta,exit_c_list,n_c_list,unknown_list,'na',big_n_dup_array