Пример #1
0
read = np.load(f)
read = read[1:, :]

labels = read[:, 0]
labels = labels.astype(int)

valid = labels == 1
dates = read[valid, 1]
dates = dates.astype(int)
hours = read[valid, 2]
hours = hours.astype(int)

all_vals = read[:, 3]
all_vals = all_vals.astype(float)

big_times = modules.date_process(dates, hours)
big_times = np.array(big_times)

#iterate through sites and take LOMB
daily_mag_array = []
daily_phase_array = []

full_time = np.arange(0, 2191, 1. / 24)

#array containing length of months from 2006 in days

month_lengths = [
    31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31,
    31, 30, 31, 30, 31, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28,
    31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30,
    31, 30, 31, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
def run_LSP(mod_data,x):

    lat_i = lat_indices[x]
    lon_i = lon_indices[x]

    print lat_i,lon_i

    current_lat = lat_c[lat_i]
    current_lon = lon_c[lon_i]

    waveform = mod_data
    
    waveform_ave = np.average(waveform)
    
    model_date_val = np.copy(model_date)
    model_time_val = np.copy(model_time)
    
    time = modules.date_process(model_date_val,model_time_val,start_year)
    
    if (species.lower() != 'gmao_temp') and (species.lower() != 'gmao_psfc') and (species.lower() != 'wind_speed') and (species.lower() != 'wind_direction'):
        waveform = waveform*1e9	
 
    #check model vals are valid
    #valid = vals >= 0
    #vals = vals[valid]
    #model_time_val = model_time[valid]
    #model_date_val = model_date[valid]

    #take 8 hour average
    divisor = 8

    total_len = len(waveform)/divisor
    start = 0
    end = divisor
    ave_waveform = []
    ave_time = []
    for i in range(total_len):
        ave = np.ma.average(waveform[start:end])
        ave_time=np.append(ave_time,time[start])
        ave_waveform=np.append(ave_waveform,ave)
        start+=divisor
        end+=divisor
 
    time=np.copy(ave_time)
    waveform=np.copy(ave_waveform)

    #take lsp unwindowed of waveform

    ua_periods,ua_mag,ua_ph,ua_fr,ua_fi = modules.take_lomb_unwindowed(time,waveform,ofac,1./24)

    #take out known periodic components 1,182.625, and 365.25 a priori for more accurate red noise fit.
    closest_daily_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i]-1.))
    closest_ha_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i]-182.625))
    closest_annual_index = min(range(len(ua_periods)), key=lambda i: abs(ua_periods[i]-365.25))

    rm_indices = [closest_daily_index,closest_ha_index,closest_annual_index]

    ua_mag_c,ua_fr,ua_fi = redfit.sidelobe_percent_remove(np.copy(ua_mag),ua_fr,ua_fi,rm_indices,5.,ua_periods)
    
    #-------------------------------------------------------------------------------
    #Do IFFT of altered spectra - with significant periods removed and gaps left in real and imag components linearly interpolated.
    #altered spectra provides red noise estimation baseline

    ##use ifft to get time series back from adjusted spectra
    #complex Fourier spectrum which corresponds to the Lomb-Scargle periodogram:
    F = [0]*((len(ua_fr)*2)+1)

    #set first real value to average 
    F[0] = complex(waveform_ave*len(waveform),0)

    #Get reverse real and imaginary values
    rev_ua_fr=np.copy(ua_fr[::-1])
    rev_ua_fi=np.copy(ua_fi[::-1])

    rev_ua_fr[0] = 0
    rev_ua_fi[0] = 0

    f_index = 1

    #Fill Fourier Spectrum real and imaginary values
    for i in range(len(ua_fr)):
        F[f_index] = complex(ua_fr[i],ua_fi[i])
        f_index+=1

    for i in range(len(ua_fr)):
        F[f_index] = complex(rev_ua_fr[i],-rev_ua_fi[i])
        f_index+=1

    F = np.array(F)    

    #Take ifft and just take real values
    ifft_ua_ts = numpy.fft.ifft(F)
    ifft_ua_ts = ifft_ua_ts.astype('float64')

    ifft_ua_ts_len = (len(ifft_ua_ts)/ofac) + np.mod(len(ifft_ua_ts),ofac)

    ifft_time = time[-ifft_ua_ts_len:]
    ifft_ua_ts = ifft_ua_ts[-len(waveform):]

    ifft_time = ifft_time-ifft_time[0]

    a_periods,a_mag,corr_a_mag,a_fr,a_fi,a_red_periods,a_red_mag,a_gredth,a_fac95,a_fac99,a_fac99_9,a_faccrit,a_fac_grid,a_sig_levels,a_tau,a_corr = redfit.red_background(nsim,mctest,ifft_time,ifft_ua_ts,ofac)

    #apply lsp correction from altered spectrum to unaltered spectrum
    corr_ua_mag = ua_mag/a_corr

    #check confidence of each point on spectrum

    sigs = np.zeros(len(corr_ua_mag))

    last_ind = len(a_sig_levels)-1

    for i in range(len(a_sig_levels)-1):
        conf_low = a_gredth*a_fac_grid[i]
        conf_up = a_gredth*a_fac_grid[i+1]
    
        current_last_ind = i+1
    
        for j in range(len(corr_ua_mag)):
            if sigs[j] == 0:
                if (corr_ua_mag[j] >= conf_low[j]) and (corr_ua_mag[j] < conf_up[j]):
                    sigs[j] = a_sig_levels[i]
                elif current_last_ind == last_ind:
                    if corr_ua_mag[j] > conf_up[j]:
                       sigs[j] = a_sig_levels[i+1]
    
    #get critical significance for all points on spectrum
    crit_sig = a_gredth*a_faccrit
    
    #get 95,99 and 99.9 % chi squared significance bands for all points on spectrum
    sig_95 = a_gredth*a_fac95
    sig_99 = a_gredth*a_fac99
    sig_99_9 = a_gredth*a_fac99_9
    
    return (x,sigs,sig_95,sig_99,sig_99_9,crit_sig,a_gredth,corr_ua_mag,ua_periods,a_tau)
Пример #3
0
model_var_mask = np.ma.masked_where(model_var < 0, model_var)

gridbox_count = len(lat_c) * len(lon_c)

lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, 1., 114.)

y = model_var[:, lat_n, lon_n]

y = y * 1e9

#set up plot
fig = plt.figure(figsize=(23, 12.3))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)

x = modules.date_process(model_date, model_time, 2005)

ofac = 4

model_periods, model_mag, model_ph, model_fr, model_fi, amp_corr = modules.take_lomb(
    x, y, ofac, 1. / 24)


def form2(x, pos):
    """ This function returns a string with 3 decimal places, given the input x"""
    return '%.2f' % x


def form5(x, pos):
    """ This function returns a string with 3 decimal places, given the input x"""
    return '%.6f' % x
Пример #4
0
        GAW_switch = 'y'

        # Read in the model output
        if GAW_switch == 'y':
            model, names = modules.readfile_GAW(
                "binary_logs/GEOS_v90103_2x2.5_GAW_O3_logs.npy",
                model_index)  #model index represents gaw location
        else:
            model, names = modules.readfile(
                "binary_logs/GEOS_v90103_4x5_CV_logs.npy",
                "001")  #001 represents single location

# Processes the model date
        date = model[:, 0]
        time = model[:, 1]
        model_time = modules.date_process(date, time)

        #Define sampling intervals
        samp_spacing = 1. / 24.

        #Convert model time array into numpy array
        model_time = np.array(model_time)

        counter = 0

        for species in species_list:
            units, obs_data_name, unit_cut, species_type, actual_species_name, obs_switch, model_cut_switch, ofac = modules.obs_variable_finder(
                species)

#set plotting area & background to white
        fig = plt.figure(figsize=(20, 12))
def site_iter_process(valid_refs,c):
#for site_ref in valid_refs:
    data_valid = True
    site_ref = valid_refs[c]
    print 'ref = ',site_ref
    site_test = all_refs == site_ref
    
    site_yyyymmdd = yyyymmdd[site_test]
    site_hhmm = hhmm[site_test]
    site_vals = vals[site_test]
 
    #convert blank invalids to -99999
    test_inv = site_vals == ''
    site_vals[test_inv] = -99999
    
    site_vals = np.float64(site_vals)
    
    #convert number invalids to -99999
    test_inv = site_vals < 0
    site_vals[test_inv] = -99999
    
    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)
    
    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    full_data[indices] = site_vals
    
    meta_index = meta_refs.index(site_ref)
    tz = meta_tz[meta_index]
    lat = np.float64(meta_lats[meta_index])
    lon = np.float64(meta_lons[meta_index])
    alt = np.float64(meta_alts[meta_index])
    raw_class_name = meta_class[meta_index]
    anthrome_class_name = class_name[meta_index]
    
    #correct timezone to UTC
    if tz < 0:
        #get rid of values at start and append -99999's at end
        cut = full_data[:tz]
        for num in range(np.abs(tz)):
            cut = np.insert(cut,0, -99999)
        full_data = cut
    elif tz > 0:
        #put -99999's at start and get rid of values at end
        cut = full_data[tz:]
        for num in range(tz):
            cut = np.append(cut, -99999)
        full_data = cut
            
    #do data quality checks        
    full_data,data_valid = modules.quality_check(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

    #set mm
    if species == 'O3':
        mm = 'ultraviolet photometry'
    elif (species == 'NO') or (species == 'NO2') or (species == 'CO'):
        mm = 'non-dispersive infrared absorption'
    
    #set sampling as average
    st = 'average'     
    
    #set site file resolution
    file_res = 'H'
    
    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def run_LSP(model_data, x):

    print obs_refs[x]
    
    vals = model_data
    
    #check obs vals are valid
    valid = vals >= 0
    vals = vals[valid]
    model_time_val = model_time[valid]
    model_date_val = model_date[valid]

    full_times = modules.date_process(model_date,model_time,start_year)
    if timeres == 'M':
        full_times_year = full_times[:12]
    else:
        full_times_year = full_times[:8766]
    full_times_day = full_times[:24]

    valid_times = modules.date_process(model_date_val,model_time_val,start_year)  
      
    site_lon = obs_lons[x]

    #convert site_lon to 0 to 360 degs
    if site_lon < 0:
        site_lon = 360-np.abs(site_lon)
    
    #transform from UTC time to solar time 
    sun_time = lon_step_time*site_lon
    time_diff = sun_time - 0
    if time_diff > 12:
        time_diff = time_diff-24

    #make time start from 0    
    valid_times_from0 = modules.phase_start_correct(valid_times)

    periodic_periods = [1./4.,1./3.,1./2.,1.,365.25/4.,365.25/3.,365.25/2.,365.25]
    periods,mag,ph,fr,fi = modules.take_lomb_spec(valid_times_from0,vals,w=True,key_periods=periodic_periods)
    
    #get mean of values
    mean_array = np.average(vals)
    
    #correct all phases for start point (not actually being from 0 - just corrected to be)
    ph = modules.phase_start_point_correct_all(periodic_periods,ph,valid_times)

    key_diurnal_periods = [1./4.,1./3.,1./2.,1.]
    key_seasonal_periods = [365.25/4.,365.25/3.,365.25/2.,365.25]

    diurnal_mags = mag[:4]
    seasonal_mags = mag[4:]
    seasonal_phs = ph[4:]

    #get individual mags and phases
    daily_h3_mag = mag[0]
    daily_h2_mag = mag[1]
    daily_h1_mag = mag[2]
    orig_daily_mag = mag[3]
    daily_h3_ph = ph[0]
    daily_h2_ph = ph[1]
    daily_h1_ph = ph[2]
    orig_daily_ph = ph[3]
    
    seasonal_h3_mag = mag[4]
    seasonal_h2_mag = mag[5]
    seasonal_h1_mag = mag[6]
    annual_mag = mag[7]
    seasonal_h3_ph = ph[4]
    seasonal_h2_ph = ph[5]
    seasonal_h1_ph = ph[6]
    annual_ph = ph[7]

    #convert sub diurnal phases from UTC to solar time
    daily_h3_ph = modules.solar_time_phase_corrector(daily_h3_ph,6,time_diff)
    daily_h2_ph = modules.solar_time_phase_corrector(daily_h2_ph,24./3.,time_diff)
    daily_h1_ph = modules.solar_time_phase_corrector(daily_h1_ph,12,time_diff)
    orig_daily_ph = modules.solar_time_phase_corrector(orig_daily_ph,24,time_diff)
    diurnal_phs = [daily_h3_ph,daily_h2_ph,daily_h1_ph,orig_daily_ph]

    #convolve annual cycle and harmonics to seasonal waveform for 1 year
    seasonal_mag,seasonal_min_ph,seasonal_max_ph,seasonal_waveform,seasonal_ff = modules.period_convolution(key_seasonal_periods,full_times_year,seasonal_mags,seasonal_phs,mean_array)

    #convolve diurnal cycle and harmonics to diurnal waveform for 1 day
    diurnal_mag,diurnal_min_ph,diurnal_max_ph,diurnal_waveform,diurnal_ff = modules.period_convolution(key_diurnal_periods,full_times_day,diurnal_mags,diurnal_phs,mean_array)
    
    #convolve all 
    full_mag,full_min_ph,full_max_ph,full_waveform,full_ff = modules.period_convolution(periodic_periods,full_times,mag,ph,mean_array)

    #convert phase to time
    daily_h3_ph = modules.convert_phase_units_actual_single(daily_h3_ph,6.)
    daily_h2_ph = modules.convert_phase_units_actual_single(daily_h2_ph,24./3.)
    daily_h1_ph = modules.convert_phase_units_actual_single(daily_h1_ph,12.)
    orig_daily_ph = modules.convert_phase_units_actual_single(orig_daily_ph,24.)
    diurnal_min_ph = modules.convert_phase_units_actual_single(diurnal_min_ph,24.)
    diurnal_max_ph = modules.convert_phase_units_actual_single(diurnal_max_ph,24.)
    seasonal_h3_ph = modules.convert_phase_units_actual_single(seasonal_h3_ph,3.)
    seasonal_h2_ph = modules.convert_phase_units_actual_single(seasonal_h2_ph,4.)
    seasonal_h1_ph = modules.convert_phase_units_actual_single(seasonal_h1_ph,6.)
    annual_ph = modules.convert_phase_units_actual_single(annual_ph,12.)
    seasonal_min_ph = modules.convert_phase_units_actual_single(seasonal_min_ph,12.)
    seasonal_max_ph = modules.convert_phase_units_actual_single(seasonal_max_ph,12.)

    return (x,daily_h3_mag,daily_h3_ph,daily_h2_mag,daily_h2_ph,daily_h1_mag,daily_h1_ph,orig_daily_mag,orig_daily_ph,diurnal_mag,diurnal_min_ph,diurnal_max_ph,seasonal_h3_mag,seasonal_h3_ph,seasonal_h2_mag,seasonal_h2_ph,seasonal_h1_mag,seasonal_h1_ph,annual_mag,annual_ph,seasonal_mag,seasonal_min_ph,seasonal_max_ph,mean_array,diurnal_waveform,seasonal_waveform,full_waveform)
Пример #7
0
#read in specific site data
site_group = obs_root_grp.groups['cvo']

#read in variables for site
obs_var = site_group.variables['o3'][:]
obs_date = site_group.variables['date'][:]
obs_time = site_group.variables['time'][:]
obs_lat = site_group.latitude
obs_lon = site_group.longitude
obs_alt = site_group.altitude
obs_group = site_group.process_group

test = obs_var >= 0
obs_var = obs_var[test]
obs_time = modules.date_process(obs_date, obs_time, 2005)
obs_time = obs_time[test]

obs_time = obs_time[:100]
obs_var = obs_var[:100]

u = np.copy(obs_time)
y = np.copy(obs_var)

m = len(u)

# minimize    (1/2) * || yhat - y ||_2^2
# subject to  yhat[j] >= yhat[i] + g[i]' * (u[j] - u[i]), j, i = 0,...,m-1
#
# Variables  yhat (m), g (m).
def site_iter_process(valid_refs,c):
#process data
#for i in range(n_refs):

    data_valid = True
    
    site_data = data[c]
    site_meta = site_data[0]    
    file_res = resolutions[c]    
    
    #get data and metadata
    
    latitudes= [site_meta['LATITUDE']]
    longitudes = [site_meta['LONGITUDE']]
    altitudes = [site_meta['ALTITUDE']]
    land_use_classes = [site_meta['LAND_USE']]
    station_classes = [site_meta['STATION CATEGORY']]
    all_mm = [site_meta['MEASUREMENT METHOD']]
    
    
    if (file_res == 'hr') or (file_res == 'da'):
        var = np.array(site_data[1].values.tolist())
    elif file_res == 'mo':
        all_var = np.array(site_data[1].values.tolist())
        var = all_var[:,1]
        end_times = all_var[:,0]
        end_date_con = [d[:4]+d[5:7]+d[8:10] for d in end_times]
        end_time_con = [d[11:13]+d[14:] for d in end_times]
        
    times = site_data[1].index
    print times
    date_con = [d.strftime('%Y%m%d') for d in times]
    time_con = [d.strftime('%H%M') for d in times]
    
    #get ref
    site_ref = valid_refs[c]
    site_group = group_codes[c]
    
    print 'ref == %s'%(site_ref) 
    print 'res = ',file_res
    
    
    #if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
    count=0
    if file_res == 'da':
        file_hours = len(date_con)
        for i in range(file_hours):
            current_hh = int(time_con[count][:2])
            current_mm = int(time_con[count][2:])
            s = datetime.datetime(year = start_year, month = 1, day = 1, hour = current_hh, minute = current_mm)
            e = datetime.datetime(year = start_year, month = 1, day = 2, hour = current_hh, minute = current_mm)
            day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1]

            date_con = np.insert(date_con,count+1,[date_con[count]]*23)
            time_con = np.insert(time_con,count+1,day_hours)
            var = np.insert(var,count+1,[var[count]]*23)
       
            count +=24

    
    if file_res == 'mo':
        file_hours = len(date_con)
    
        for i in range(file_hours):
            current_year = int(date_con[count][:4])
            current_month = int(date_con[count][4:6])
            current_day = int(date_con[count][6:])
            current_hour = int(time_con[count][:2])
        
            next_year = int(end_date_con[i][:4])
            next_month = int(end_date_con[i][4:6])
            next_day = int(end_date_con[i][6:])
            next_hour = int(end_time_con[i][:2])
        
            s = datetime.datetime(year = current_year, month = current_month, day = current_day, hour = current_hour, minute = 0)
            e = datetime.datetime(year = next_year, month = next_month, day = next_day, hour = next_hour, minute = 0)
        
            day_date = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][:-1]
            day_hour = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][:-1]
            date_con = np.insert(date_con,count+1,day_date)
            time_con = np.insert(time_con,count+1,day_hour)
            var = np.insert(var,count+1,[var[count]]*len(day_date))
            count += (len(day_date)+1)

    date_con = np.array(date_con).astype(int)
    time_con = np.array(time_con).astype(int)
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    
    full_data = np.empty(len(grid_dates))
    full_data[:] = -99999
 
    full_data[indices] = var
    
    #convert nans to -99999's
    nan_inds = np.isnan(full_data)
    full_data[nan_inds] = -99999
    
    #get mode of metadata
    lat =  np.float64(stats.mode(latitudes)[0][0]) 
    lon =  np.float64(stats.mode(longitudes)[0][0])  
    alt = np.float64(stats.mode(altitudes)[0][0]) 
    land_use_class = stats.mode(land_use_classes)[0][0]
    raw_class_name = stats.mode(station_classes)[0][0]
    mm = stats.mode(all_mm)[0][0]
        
    
    #get measurement method
    if (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O331M UV Ozone Analyzer') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9800') or (mm == 'Ultraviolet (UV) photometryThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryMCV 48-AUV') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400A UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 48 CO analyser') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400E UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 300 O3 analyser') \
    or (mm == 'Ultraviolet (UV) photometry342 M') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9812 O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 350E O3 analyser') or (mm == 'Ultraviolet (UV) photometryENVIRONMENT 1003 AH') or (mm == 'Ultraviolet (UV) photometryC.S.I. 3.100') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003 O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810B O3 analyser') or (mm == 'Ultraviolet (UV) photometrytoo generic') or (mm == 'Ultraviolet (UV) photometryThermo 49 CPS Ozone Primary Standard') \
    or (mm == 'Ultraviolet (UV) photometryDASIBI') or (mm == 'UV fluorescencetoo generic') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003-PC O3 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 49i O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-PC O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-RS O3 analyser') or (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O341M UV Ozone Analyzer') or (mm == 'Ultraviolet (UV) photometryISEO Argopol-SAM-XAIR')  \
    or (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O342M UV Ozone Analyze') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 370 O3 analyser') or (mm == 'spectrophotometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-AH O3 analyser') or (mm == 'UV fluorescenceThermo 49c' ) or (mm == 'Ultraviolet (UV) photometryPHILIPS K50110/00 UV Photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 8810 O3 analyser') or (mm == 'Ultraviolet (UV) photometryPHILIPS K50094 API 400') or (mm == 'Ultraviolet (UV) photometryORION') or (mm == 'Ultraviolet (UV) photometryThermo model 49w O3 analyser') \
    or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810 O3 analyser') or (mm == 'Ultraviolet (UV) photometryCOLUMBIA SCIENTIFIC IC 3100') or (mm == 'Ultraviolet (UV) photometry2008A') or (mm == 'Ultraviolet (UV) photometryThermo model 43s SO2 analyser') or (mm == 'Ultraviolet (UV) photometryMLU') or (mm == 'Ultraviolet (UV) photometryThermo model 49 O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1108 O3 analyser') or (mm == 'Ultraviolet (UV) photometryAMIBRACK') or (mm == 'Ultraviolet (UV) photometryThermo model 49c O3 analyser') or (mm == 'UV fluorescenceUNKNOWN') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400 UV photometric O3 analyser') \
    or (mm == 'UV fluorescenceTeledyne API 400 UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9830 CO analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 5014') or (mm == 'Ultraviolet (UV) photometryEnvironics 300/ Environics') or (mm == 'Ultraviolet (UV) photometryANALYSIS AUTOMATION Mod. 427') or (mm == 'Ultraviolet (UV) photometryANALYSIS AUTOMATION') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008 O3 analyser') or (mm == 'ultraviolet absorptionORION') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9811 O3 analyser') or (mm == 'Ultraviolet (UV) photometryENVIRONMENT 1003RS') \
    or (mm == 'UV absorption (ref)UNKNOWN') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Environnement S.A. SANOA Multigas Longpath Monitoring System') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003-RS O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 350 O3 analyser') or (mm == 'Ultraviolet (UV) photometrySFI O342M') or (mm == 'UV fluorescenceMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryDANI ENVIRONMENT 1003 AH') or (mm == 'Ultraviolet (UV) photometryS-5014') or (mm == 'Ultraviolet (UV) photometryThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryUNKNOWN') \
    or (mm == 'Ultraviolet (UV) photometryHoriba model APNA 360 NOx analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'UV fluorescenceThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryContiflo') or (mm == 'Ultraviolet (UV) photometryTeledyne API undertermined') or (mm == 'UV fluorescenceThermo model 43a SO2 analyser') or (mm == 'UV fluorescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm == 'UV fluorescenceThermo model 43c SO2 analyser') \
    or (mm =='Ultraviolet (UV) photometryTeledyne API undertermined') or (mm =='UV fluorescenceUNKNOWN') or (mm =='UV fluorescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm =='Ultraviolet (UV) photometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810 O3 analyser') \
    or (mm == 'ChemiluminescenceTeledyne API undertermined') or (mm == 'ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360 NOx analyser') or (mm == 'ChemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC31M NO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 14B chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841 NOx analyser') or (mm == 'ChemiluminescenceENVIRONMENT ZC 32M') or (mm == 'ChemiluminescenceHoriba model APNA 300 NOx analyser') or (mm == 'chemiluminescenceENVIRONNEMENT AC 30M') \
    or (mm == 'ChemiluminescenceThermo model 42i NO/Nox analyser') or (mm == 'ChemiluminescenceTeledyne API 400 UV photometric O3 analyser') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION') or (mm == 'ChemiluminescenceMonitor Labs model 8941A NOx analyser') or (mm == 'ChemiluminescenceTeledyne API undertermined') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC32M NO2 Analyzer') or (mm == 'ChemiluminescenceTeledyne API 200E chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceHoriba model APHA 360E hydrocarbons analyser') or (mm == 'ChemiluminescenceMELOY S1600') or (mm == 'ChemiluminescenceECO PHYSICS CLD 700') or (mm == 'ChemiluminescenceORION') \
    or (mm == 'ChemiluminescenceTECAN CLD 502') or (mm == 'ChemiluminescenceMonitor Labs model 9850 SO2 analyser') or (mm == 'ChemiluminescenceECO PHYSICS CLD 700 AL') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC30M NO2 Analyzer') or (mm == 'ChemiluminescenceMCV 30-QL') or (mm == 'ChemiluminescenceAMBIRACK') or (mm == 'ChemiluminescenceTeledyne API 100A UV Fluorescent SO2 Analyser') or (mm == 'ChemiluminescenceS-5012') or (mm == 'ChemiluminescenceAirpointer') or (mm == 'ChemiluminescenceThermo model 42c NO/Nox analyser') or (mm == 'ChemiluminescenceThermo model 42i-TL (Trace level Nox)') or (mm == 'ChemiluminescenceMonitor Labs model 9841T NOx analyser') \
    or (mm == 'ChemiluminescenceThermo model 42 NO/Nox analyser') or (mm == 'ChemiluminescenceMonitor Labs model 8841 NOx analyser') or (mm == 'ChemiluminescenceColumbia Scientific Industries Models 1600') or (mm == 'chemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION Mod. 447') or (mm == 'ChemiluminescenceSFI AC32M') or (mm == 'ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm == 'Chemiluminescenceserinus 40 Nox') or (mm == 'ChemiluminescenceThermo model 42s NO/Nox analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360 NOx analyser') or (mm == 'ChemiluminescenceThermo model 42C-TL (Trace level Nox)') \
    or (mm == 'ChemiluminescenceTeledyne API 200 chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 8440 NOx analyser') or (mm == 'ChemiluminescencePHILIPS K50034 API 200A') or (mm == 'ChemiluminescenceENVIRONMENT') or (mm == 'ChemiluminescenceMonitor Labs model 8840 NOx analyser') or (mm == 'chemiluminescenceHORIBA APNA 370') or (mm == 'ChemiluminescenceMonitor Labs undetermined') or (mm == 'ChemiluminescencePHILIPS 42') or (mm == 'ChemiluminescencePHILIPS K50109/00 Gas Filter Correlation CO analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm == 'ChemiluminescenceThermo model 43 SO2 analyser') \
    or (mm == 'ChemiluminescenceHoriba model APNA 350 NOx analyser') or (mm == 'ChemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceTHERMO ELECTRON INSTRUMENTS') or (mm == 'ChemiluminescenceLAP 884') or (mm == 'ChemiluminescenceMonitor Labs model 9841A NOx analyser') or (mm == 'ChemiluminescenceHoriba model APNA 370 NOx analyser') or (mm == 'ChemiluminescenceDASIBI 2108 NOx analyser') or (mm == 'ChemiluminescenceThermo model 14B/E chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AF22M SO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 42w NO/Nox analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360E NOx analyser') \
    or (mm == 'Chemiluminescencetoo generic') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm == 'ChemiluminescencePHILIPS K50235/00 NO-NOx-NO2 analyser') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC31M NO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 14B chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841 NOx analyser') or (mm =='ChemiluminescenceENVIRONMENT ZC 32M') or (mm =='ChemiluminescenceBENDIX') or (mm =='ChemiluminescenceThermo model 42i NO/Nox analyser') \
    or (mm =='ChemiluminescenceTeledyne API 400 UV photometric O3 analyser') or (mm =='Ultraviolet (UV) photometryHoriba model APNA 360 NOx analyser') or (mm =='ChemiluminescenceThermo model 48 CO analyser') or (mm =='ChemiluminescenceMonitor Labs model 8941A NOx analyser') or (mm =='ChemiluminescenceTeledyne API undertermined') or (mm =='ChemiluminescenceEnvironnement S.A. Model AC32M NO2 Analyzer') or (mm =='ChemiluminescenceTeledyne API 200E chemiluminescent NOx analyser') or (mm =='ChemiluminescenceHoriba model APHA 360E hydrocarbons analyser') or (mm =='ChemiluminescenceECO PHYSICS CLD 700') or (mm =='ChemiluminescenceORION') \
    or (mm =='ChemiluminescenceTECAN CLD 502') or (mm =='ChemiluminescenceMonitor Labs model 9850 SO2 analyser') or (mm =='ChemiluminescenceECO PHYSICS CLD 700 AL') or (mm =='ChemiluminescenceEnvironnement S.A. Model AC30M NO2 Analyzer') or (mm =='ChemiluminescenceMCV 30-QL') or (mm =='ChemiluminescenceBendix/Combustion Engineering Model 8101-C Oxides of Nitrogen Analyze') or (mm =='ChemiluminescenceTeledyne API 100A UV Fluorescent SO2 Analyser') or (mm =='ChemiluminescenceS-5012') or (mm =='ChemiluminescenceHoriba model APNA 300E NOx analyser') or (mm =='ChemiluminescenceThermo model 42c NO/Nox analyser') \
    or (mm =='ChemiluminescenceMonitor Labs model 8440 NOx analyser') or (mm =='ChemiluminescenceThermo model 42i-TL (Trace level Nox)') or (mm =='ChemiluminescenceThermo model 42 NO/Nox analyser') or (mm =='ChemiluminescenceMonitor Labs model 8841 NOx analyser') or (mm =='ChemiluminescenceColumbia Scientific Industries Models 1600') or (mm =='chemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION Mod. 447') or (mm =='ChemiluminescenceAirpointer') or (mm =='ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm =='ChemiluminescenceThermo model 42s NO/Nox analyser') or (mm =='ChemiluminescenceHoriba model APNA 360 NOx analyser') \
    or (mm =='ChemiluminescenceTeledyne API 200 chemiluminescent NOx analyser') or (mm =='ChemiluminescencePHILIPS K50034 API 200A') or (mm =='ChemiluminescenceENVIRONMENT') or (mm =='ChemiluminescenceMonitor Labs model 8840 NOx analyser') or (mm =='Beta ray attenuationTeledyne API 200A chemiluminescent NOx analyser') or (mm =='ChemiluminescenceMonitor Labs undetermined') or (mm =='ChemiluminescencePHILIPS K50102 NO') or (mm =='Chemiluminescencetoo generic') or (mm =='ChemiluminescenceThermo model 42C-TL (Trace level Nox)') or (mm =='ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm =='ChemiluminescenceTHERMO ENVIRONMENTAL INSTRUMENTS') \
    or (mm =='ChemiluminescenceHoriba model APNA 350 NOx analyser') or (mm =='ChemiluminescenceUNKNOWN') or (mm =='ChemiluminescenceTHERMO ELECTRON INSTRUMENTS') or (mm =='ChemiluminescenceLAP 884') or (mm =='ChemiluminescenceMonitor Labs model 9841A NOx analyser') or (mm =='ChemiluminescenceHoriba model APNA 370 NOx analyser') or (mm =='ChemiluminescenceDASIBI 2108 NOx analyser') or (mm =='ChemiluminescenceThermo model 14B/E chemiluminescence NO-NO2-Nox') or (mm =='ChemiluminescenceThermo model 42w NO/Nox analyser') or (mm =='ChemiluminescenceHoriba model APNA 360E NOx analyser') or (mm =='ChemiluminescenceEC9843') or (mm =='ChemiluminescencePHILIPS K50109/00 Gas Filter Correlation CO analyser') \
    or (mm =='ChemiluminescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm =='ChemiluminescencePHILIPS K50235/00 NO-NOx-NO2 analyser') or (mm =='ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm =='ChemiluminescenceEnvironnement S.A. Model CO12M CO Analyzer') or (mm =='ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm =='ChemiluminescenceUNKNOWN') or (mm =='Chemiluminescencetoo generic') or (mm =='Beta ray attenuationMLU') or (mm =='Beta ray attenuationORION') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'UV fluorescenceHoriba model APNA 360 NOx analyser') \
    or (mm == 'UV fluorescenceUNKNOWN') or (mm == 'UV fluorescenceThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser'):
        
        if species == 'O3':
            mm = 'ultraviolet photometry'
        elif (species == 'NO') or (species == 'NO2') or (species == 'CO'):
            mm = 'chemiluminescence'
        else:
            1+'a'
    
    if (mm =='Non-dispersive infrared spectroscopy (NDIR)Meloy Model SA 700 Fluorescence Sulfur Dioxide Analyze') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9830B CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 8831 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ORION') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 200A chemiluminescent NOx analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ANALYSIS AUTOMATION') or (mm =='Non-dispersive infrared spectroscopy (NDIR)THERMO ELECTRON INSTRUMENTS') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 43a SO2 analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 8830 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)CO ANALAYZER') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO12M CO Analyzer') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48i CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)too generic') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS K50093 API 300A') or (mm =='Non-dispersive infrared spectroscopy (NDIR)MLU') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 300 CO analyser') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)MLU 300') or (mm =='Non-dispersive infrared spectroscopy (NDIR)UNKNOWN') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ENVIRONMENT') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300 gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 49 O3 analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48w CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Maihak Unor 6N') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360E CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs undetermined') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300E gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 100 UV Fluorescent SO2 Analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO10M CO Analyzer') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 350 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)FUJI ZRC') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API undertermined') or (mm =='Non-dispersive infrared spectroscopy (NDIR)S-5006') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 350E CO analyser') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48c CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 42 NO/Nox analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)SFI CO12M') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360CE CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS 48') or (mm =='Non-dispersive infrared spectroscopy (NDIR)DASIBI 3008 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300A gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 370 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO11M CO Analyzer') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9841A NOx analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)AAL 407') or (mm =='Non-dispersive infrared spectroscopy (NDIR)AMBIRACK') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9830 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 300E CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS K50109/00 Gas Filter Correlation CO analyser') or (mm =='UNKNOWNTeledyne API 300 gas filter correlation CO analyser') or (mm =='UNKNOWNHoriba model APMA 350 CO analyser') \
    or (mm =='Infrared gas filter correlationTHERMO ELECTRON INSTRUMENTS 48c') or (mm =='Infrared gas filter correlationHoriba model APMA 360 CO analyser') or (mm =='infrared absorptionUNKNOWN') or (mm =='Infrared gas filter correlationUNKNOWN') or (mm =='Infrared gas filter correlationTeledyne API 300E gas filter correlation CO analyser'):
        mm = 'non-dispersive infrared spectroscopy'
    
    if (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Opsis AR500 Open path monitor') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)UNKNOWN') or (mm ==  'Ultraviolet (UV) photometryDOAS') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Environnement S.A. SANOA Multigas Longpath Monitoring System'): 
        mm = 'differential optical absorption spectrosocopy'
    
    if (mm == 'flame photometryThermo model 48 CO analyser') or (mm == 'flame photometryTeledyne API 300 gas filter correlation CO analyser'):
        mm = 'flame photometry'
    
    if (mm == 'Gas Chromatography (ref)UNKNOWN') or (mm == 'chromatographyUNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GUNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GEnvironnement VOC71M') or (mm == 'chromatographyMonitor Labs model 8440 NOx analyser') or (mm == 'Gas chromatography (GC) + flame ionisation (GC-FID)UNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GAIRMOZONE') or (mm =='Gas chromatography followed by flame ionization detection (GVarian Chrompack') or (mm =='chromatographyChrompack BTX CP7001 Monitor') or (mm =='Gas chromotography (GC)UNKNOWN'):
        mm = 'gas chromatography flame ionisation detection'    
    
    if (mm == "Griess-Saltzman reactionLipinski's aspirator") or (mm == 'Griess-Saltzman reaction101') or (mm == 'Griess-Saltzman reactionUNKNOWN') or (mm == "UNKNOWNLipinski's aspirator") or (mm == 'Griess-Saltzman reactionBUBBLER 24 H') or (mm == "Griess-Saltzman reactionLipinski's aspirator AGT24") or (mm == 'Griess-Saltzman reactionfilter pack') or (mm == 'NEDA Griess-Yloswayaspirator') or (mm == 'colorimetryUNKNOWN'):
        mm = 'griess saltzman colorimetric'
    
    if (mm == 'SpectrophotometrySequential Air Sampler, Type SS2000. NaI-impregnated glass sinters') or (mm == 'SpectrophotometryGlass tubes') or (mm == 'Spectrophotometryglass_sinter')  or (mm =='Spectrophotometryfilter pack') or (mm == 'SpectrophotometryUNKNOWN') or (mm == 'Spectrophotometryphotocolorimeter') or (mm == "SpectrophotometryLipinski's aspirator") or (mm == 'SpectrophotometryBUBBLER 24 H') or (mm == 'SpectrophotometryIMPREGNATED FILTER') or (mm == 'Spectrophotometryglass filter') or (mm == 'spectrophotometryUNKNOWN'):
        mm = 'spectrophotometry'
    
    if (mm == 'SpectrometryBUBBLER 24 H') or (mm == 'Atomic absorption spectrometry (AAS)UNKNOWN'):
        mm = 'spectrometry'
    
    if (mm == 'Ion chromatographyIMPREGNATED FILTER'):
        mm = 'ion chromatography'
    
    if (mm == 'diffusive samplerUNKNOWN') or (mm == 'UNKNOWNSEQUENTIAL SAMPLER') or (mm == 'TGS-ANSAFILTER'):
        mm = 'diffusive sampler'
        
    if (mm == 'Flame ionization detection (FID)Chrompack CP9000'):
        mm = 'flame ionisation detection'  
    
    if (mm == 'coulometryUNKNOWN'):
        mm = 'coulometry'
    
    if (mm == 'Gas chromatography + mass spectrometry (GC-MS)AF 20 M') or (mm == 'GAS CHROMATOGRAPHY - MASS SPECTROMETRYUNKNOWN') or (mm == 'Gas chromatography + mass spectrometry (GC-MS)UNKNOWN') or (mm == 'Gas chromatography + mass spectrometry GC-MS after solvent oMarkes Thermal Desorber + Agilent gas Chromatograph Mass Spectrometer'):
        mm = 'gas chromatography mass spectrometry'
    
    if (mm == 'Gas chromatography with photo ionization detectorSYNTECH SPECTRAS GC 955 series undetermined') or (mm == 'Gas chromatography with photo ionization detectorUNKNOWN'):
        mm = 'gas chromatography photo ionization detection'
    
    #if measurement type is unknown then set default measurement method for species
    try:
        if (np.isnan(mm) == True):
            if species == 'O3':
                mm = 'ultraviolet photometry'
            elif (species == 'NO') or (species == 'NO2'):
                mm = 'chemiluminescence'
            elif species == 'CO':
                mm = 'non-dispersive infrared spectroscopy'
            elif species == 'ISOP':
                mm == 'gas chromatography flame ionisation detection'
    except:
        if (mm == 'UNKNOWNUNKNOWN'):
            if species == 'O3':
                mm = 'ultraviolet photometry'
            elif (species == 'NO') or (species == 'NO2'):
                mm = 'chemiluminescence'
            elif species == 'CO':
                mm = 'non-dispersive infrared spectroscopy'
            elif species == 'ISOP':
                mm == 'gas chromatography flame ionisation detection'
            

    #do data quality checks
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

    #convert file res to standard format
    if file_res == 'hr':
        file_res = 'H'
    elif file_res == 'da':
        file_res = 'D'
    elif file_res == 'mo':
        file_res = 'M'
        
    #set sampling as average
    st = 'average'
    
    anthrome_class_name = 'na'

    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
    def site_iter_process(valid_refs,c):

        #for each valid location process
        #limit obs data due for each site in valid_obs_site_names
        #for c in range(len(valid_refs)):
    
        all_lat = []
        all_lon = []
        all_alt = []
        all_st = []
        all_mm = []

        site_ref = valid_refs[c]

        file_valid = True
        data_valid = True

        print site_ref
        file_res = data_resolutions[c]
        print file_res

        #read files for each valid site
        s_files = sorted(glob.glob('/work/home/db876/observations/surface/%s/GAW/%s**.%s**.dat'%(species,site_ref.lower(),file_res))) 
                  
        print s_files      
        if file_res == 'hr':
            site_files = sorted(s_files, key = lambda x: x.split(".hr")[1])

        else:
            site_files = sorted(s_files)

        delete_inds = []
        if file_res == 'hr':
            #limit site files before and after year limit
        
            for i in range(len(site_files)):
                f = site_files[i]
                year = f.split(".hr")[1][:4]
                if int(year) < int(start_year):
                    delete_inds.append(i)
                if int(year) > int(end_year):
                    delete_inds.append(i)

            site_files = np.delete(site_files,delete_inds)
            print site_files
    
            if len(site_files) == 0:
                print 'No valid files in date range. Skipping.'
                data_valid = False
                return c,[],data_valid,-999,-999,-999,'na','na','na','na','na',-999

        site_file_len = len(site_files)
        s_count = 0
        start_ind = 0
        end_ind = 0
        for f in site_files:
            print f
            read = np.loadtxt(f,dtype="S10,S5,f8",comments='C',usecols=(0,1,4),unpack =True) 	
            read = np.array(read)
    
            dates = read[0,:]
            times = read[1,:]
            conc = read[2,:]
            conc = np.array(conc)
            conc = conc.astype(float)
    
            #change all vals < 0 to np.NaN
            inv_test = conc < 0
            conc[inv_test] = np.NaN
    
            start_ind = end_ind
            end_ind+=len(conc)
    
            s_count+=1
    
            units = [] 
            mycsv = csv.reader(open(f))
            row_count = 0
            for row in mycsv:
                if row_count == 11:
                    val = " ".join(row)
                    lat = val.replace(" ", "")
                    lat = lat[12:]
                    lat = float(lat)
                    all_lat.append(lat)
                # get lon
                if row_count == 12:
                    val = " ".join(row)
                    lon = val.replace(" ", "")
                    lon = lon[13:]
                    lon = float(lon)
                    all_lon.append(lon)
                # get altitude
                if row_count == 13:
                    val = " ".join(row)
                    alt = val.replace(" ", "")
                    alt = alt[12:] 
                    alt = float(alt) 
                    all_alt.append(alt)
                # get units
                if row_count == 20:
                    val = " ".join(row)
                    unit = val.replace(" ", "")
                    unit = unit[19:]           
                # get measurement method
                if row_count == 21:
                    val = " ".join(row)
                    mm = val.replace(" ", "")
                    mm = mm[21:]  
                    all_mm.append(mm)
                # get sampling type
                if row_count == 22:
                    val = " ".join(row)
                    st = val.replace(" ", "")
                    st = st[16:]  
                    all_st.append(st)
                if row_count == 23:
                    val = " ".join(row)
                    tz = val.replace(" ", "")
                    tz = tz[12:]  

        
                row_count+=1   
        
            # test if units are in ppb for each file - if not convert
    
            if (unit != 'ppb') & (unit != 'ppbv'):
                if (unit == 'ug/m3') or (unit == 'ugN/m3'): 
                    print 'converting units, temp = 20degC'
                    #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                    conc = conv_fact*conc
                elif (unit == 'ug/m3-20C') or (unit == 'ugN/m3-20C'):
                    print 'converting units, temp = 20degC'
                    #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                    conc = conv_fact*conc
                elif (unit == 'ug/m3-25C') or (unit == 'ugN/m3-25C') or (unit == 'ug/m3at25C'):
                    print 'converting units, temp = 25degC'
                    #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10)
                    conc = conv_fact*conc
                elif (unit == 'mg/m3-20C') or (unit == 'mgN/m3-20C'):
                    print 'converting units, temp = 25degC'
                    #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                    conc = (conv_fact*conc)*1e3
                elif (unit == 'mg/m3-25C') or (unit == 'mgN/m3-25C'):
                    print 'converting units, temp = 25degC'
                    #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10)
                    conc = (conv_fact*conc)*1e3
                elif (unit == 'ppm') or (unit == 'ppmv'):
                    conc = conc*1.e3
                elif (unit == 'ppt') or (unit == 'pptv'):
                    conc = conc/1.e3
        
                else:
                    print 'Unknown Unit'
                    print unit
                    1+'a'
                    break
            
            if tz != 'UTC':
                if tz == '':
                    if site_ref.lower() in ['plm']:
                        tz = -5
        
                    if site_ref.lower() in ['kos','edm','vdl','nwr']:
                        tz = 0

                    if site_ref.lower() in ['jfj','kps','rig','pay','glh','cmn','zep','dig','hhe','ktb','stp','ivn','jcz','kam','lzp','snz','zbl','kmw','don','mhn','nia','roq','spm']: 
                        tz = 1

                    if site_ref.lower() in ['rcv','aht','oul','uto','vir','fdt','sem','stn']:
                        tz = 2
                
                    if site_ref.lower() in ['dak']:
                        tz = 3
                
                    if site_ref.lower() in ['shp']:
                        tz = 4
                    
                    if site_ref.lower() in ['isk']:
                        tz = 5
    
                    if site_ref.lower() in ['hkg']:
                        tz = 8

                    if site_ref.lower() in ['cgo']:
                        tz = 10
                else:        
                    tz = tz.replace('LocaltimeUTC', '')
                    tz = tz.replace('OtherUTC', '')
                    tz = tz.replace('Localtime', '')
                    tz = tz.replace(':', '.')
        
                    try:
                        before, sep, after = tz.rpartiton('.')
                        after = int(after)
                        conv = (100./60) * after
                        tz = before+sep+str(conv)
                    except:
                        1+1 
                    tz = float(tz)
        
            else: 
                tz = 0
    
            #check tz is whole number else skip site
            if (tz % 1) != 0:
                print 'File Invalid, timezone is not a whole number.'
                conc[:] = -99999
    
            #process dates from date, time to days since start year
            dates = [s.replace('-', '') for s in dates]			
            times = [s.replace(':', '') for s in times]
    
            if file_res == 'hr':
                #some times go from 0100 to 2400, assume this is when sites report ave for hour previous. Thus all times should have hour minused
                for i in range(len(times)):
                    if times[i] == '2400':
                        current_date = dates[i]
                        test = np.array(dates) == current_date
                        indices = [i for i, x in enumerate(test) if x]
                        for x in indices:
                            current_time = times[x]
                            if current_time == '2400':
                                current_time = '0000'
                            date_datetime = datetime.datetime(int(current_date[0:4]),int(current_date[4:6]),int(current_date[6:]),int(current_time[:2]),int(current_time[2:]))
                            date_datetime = date_datetime - datetime.timedelta(hours = 1)
                            times[x] = date_datetime.strftime("%H%M")
    
                #adjust dates and times if tz is not equal to 0
                if tz != 0:
                    for i in range(len(dates)):
                        #create datetime
                        dt = datetime.datetime(int(dates[i][:4]),int(dates[i][4:6]),int(dates[i][6:]),int(times[i][:2]),int(times[i][2:]))
                        if tz > 0:
                            #print 'Old dt', dt
                            dt  = dt - datetime.timedelta(hours = int(tz))
                            #print 'New dt', dt
                        elif tz < 0:
                            #print 'Old dt', dt
                            dt  = dt + datetime.timedelta(hours = np.abs(int(tz)))
                            #print 'New dt', dt
                        dates[i] = dt.strftime("%Y%m%d")
                        times[i] = dt.strftime("%H%M")
        
            data = [dates,times,conc]
            try:
                big_list = np.hstack((big_list,data))
            except:
                big_list = np.array(data)    
            
    
            if (s_count == site_file_len):	
          
                #make sure big list exists
                try:
                    big_list
                except:
                    data_valid = False
            
                if data_valid == True:          
  
                    #get dates and times
                    date_con = big_list[0,:]
                    time_con = big_list[1,:]
              
                    #get vals
                    vals = np.array(big_list[2,:]).astype(float) 

                    #delete big list
                    del big_list

                    #if dates outside what asked for exclude          
                    first_date_val = int('%s0101'%(start_year))
                    last_date_val = int('%s1231'%(end_year))
        
                    test_valid = (np.array(date_con).astype(int) >= first_date_val) & (np.array(date_con).astype(int) <= last_date_val)
                    date_con = date_con[test_valid]
                    time_con = time_con[test_valid]
                    vals = vals[test_valid]
            
                    #Check if any times are duplicate, if so delete all but first
                    del_list = []
                    for d in range(len(date_con)-1):
                        if (date_con[d] == date_con[d+1]) & (time_con[d] == time_con[d+1]):
                            del_list.append(d+1)
                    if len(del_list) > 0:
                        print 'Deleting duplicate timepoints'
                        print date_con[del_list],time_con[del_list]
                        date_con = np.delete(date_con,del_list)
                        time_con = np.delete(time_con,del_list)
                        vals = np.delete(vals,del_list)
            
                    #if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
                    count=0
                    if file_res == 'da':
                        file_hours = len(date_con)
                        for i in range(file_hours):
                            current_hh = int(time_con[count][:2])
                            current_mm = int(time_con[count][2:])
                            s = datetime.datetime(year = start_year, month = 1, day = 1, hour = current_hh, minute = current_mm)
                            e = datetime.datetime(year = start_year, month = 1, day = 2, hour = current_hh, minute = current_mm)
                            day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1]
        
                            date_con = np.insert(date_con,count+1,[date_con[count]]*23)
                            time_con = np.insert(time_con,count+1,day_hours)
                            vals = np.insert(vals,count+1,[vals[count]]*23)
               
                            count +=24
        
            
                    if file_res == 'mo':
                        file_hours = len(date_con)
                        for i in range(file_hours):
                            current_year = int(date_con[count][:4])
                            current_month = int(date_con[count][4:6])
                
                            next_month = current_month+1
                            if next_month > 12:
                                next_month = 1
                                next_year = current_year+1
                            else:
                                next_year = current_year 
                
                            s = datetime.datetime(year = current_year, month = current_month, day = 1, hour = 1, minute = 0)
                            e = datetime.datetime(year = next_year, month = next_month, day = 1, hour = 0, minute = 0)
                
                            day_date = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][:-1]
                            day_hour = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][:-1]
                            date_con = np.insert(date_con,count+1,day_date)
                            time_con = np.insert(time_con,count+1,day_hour)
                            vals = np.insert(vals,count+1,[vals[count]]*len(day_date))
                            count += (len(day_date)+1)
        
                    date_con = np.array(date_con).astype(int)
                    time_con = np.array(time_con).astype(int)
        
                    #create max possible o3 grid
                    o3_data = np.empty(n_hours)
                    o3_data[:] = -99999
                
                    #delete dates,times and var outside date range
                    val_test = (date_con >= int(output_res_dates_strings[0])) & (date_con <= int(output_res_dates_strings[-1]))
                    date_con = date_con[val_test]
                    time_con = time_con[val_test]
                    vals = vals[val_test]
                
                    print date_con
        
                    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                    converted_time = modules.date_process(date_con,time_con,start_year)
                    converted_time = np.round(converted_time,decimals=5)
                    syn_grid_time = np.arange(0,n_days,1./24)
                    syn_grid_time = np.round(syn_grid_time,decimals=5)
                    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
                    o3_data[indices] = vals 
        
                    #convert all Nans back to -99999
                    test = np.isnan(o3_data)
                    o3_data[test] = -99999
        
                    #get mode of metadata
                    lat = np.float64(stats.mode(all_lat)[0][0]) 
                    lon = np.float64(stats.mode(all_lon)[0][0])  
                    alt = np.float64(stats.mode(all_alt)[0][0]) 
                    st = stats.mode(all_st)[0][0]
                    mm = stats.mode(all_mm)[0][0]

                    #check site is not urban using anthrome map from 2000
                    anthfile = '/work/home/db876/plotting_tools/core_tools/anthro2_a2000.nc'
                    anthload = Dataset(anthfile)
                    class_valid,anthrome_class_name = modules.anthrome_classify(anthload,[lat],[lon])
                    if class_valid == 'invalid':
                        data_valid = False
                        print 'Site Invalid, site classed as urban by anthrome map.'

                    #get measurement type and sampling type (take mode from collected list)
                    if (st == 'continuous') or (st == 'continuous(carbondioxide),remotespectroscopicmethod(methaneandsurfaceozone)' or (st == 'continuous(carbondioxide)remotespectroscopicmethod(methaneandsurfaceozone)')):
                        st = 'average'
                    elif st == 'flask':
                        st = 'flask'
                    elif st == 'filter':
                        st = 'filter'
                    else:
                        print st
                        1+'a'

                    if mm == 'Lightabsorptionanalysis(UV)':
                        mm = 'ultraviolet photometry'
            
                    elif  mm == 'CavityRingdownSpectroscopy':
                        mm = 'cavity ringdown spectroscopy'
            
                    elif  mm == 'NDIR':
                        site_mm = 'non-dispersive infrared spectroscopy' 
            
                    elif (mm == 'GasChromatography(FID)'): 
                        site_mm = 'gas chromatography flame ionisation detection' 
            
                    elif (mm == 'Gas Chromatography (RGD)'):
                        site_mm = 'gas chromatography reduction gas detection'
        
                    elif mm == 'Chemiluminescence':
                        mm = 'chemiluminescence'
            
                    elif (mm == 'Spectrophotometry') or (mm == 'spectrophotometry,naphthyl-ethylenediaminedihydrochloridemethod'):
                        mm = 'spectrophotometry'

                    elif mm == 'continuous(carbondioxide)remotespectroscopicmethod(methaneandsurfaceozone)':        
                        mm = 'near infrared spectroscopy'

                    elif mm == '':
                        if species == 'O3':
                            mm = 'ultraviolet photometry'
                        if species == 'CO':
                            mm = 'non-dispersive infrared spectroscopy'
                        if species == 'NO2':
                            mm = 'chemiluminescence'
                        if species == 'NO':
                            mm = 'chemiluminescence'
                        if species == 'ISOP':
                            mm = 'gas chromatography flame ionisation detection'
                
                    #do data quality checks        
                    full_data,data_valid,data_complete = modules.quality_check_periodic(o3_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)
        
                    #convert file res to standard format
                    if file_res == 'hr':
                        file_res = 'H'
                    elif file_res == 'da':
                        file_res = 'D'
                    elif file_res == 'mo':
                        file_res = 'M'
                    
                    #no raw class so set as na
                    raw_class_name = 'na'

                    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete
Пример #10
0
def site_iter_process(valid_refs, c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

    ref = valid_refs[c]
    print 'ref = ', ref, c

    #get site instrument for species
    met_i = file_refs.index(ref)
    file_name = met_refs[met_i]
    site_name = met_sitenames[met_i]
    print site_name
    site_species = list(met_species[met_i])
    print site_species
    site_instruments = list(met_instruments[met_i])
    m_method = site_instruments[site_species.index(species)]

    site_resolutions = []
    data_valid = True

    s_files = insensitive_glob(
        '/work/home/db876/observations/surface/%s/EANET/*%s.csv' %
        (fname_species, file_name))
    site_files = []
    for y in year_array:
        for f in s_files:
            if str(y)[-2:] in f:
                site_files.append(f)

    site_files = modules.natsorted(site_files)

    years = []
    months = []
    days = []
    hours = []

    vals = []
    yyyymmdd = []
    hhmm = []

    n_dup_array = []

    last_year_index = len(site_files)
    for y in year_array:
        got_year = False
        for file in site_files:
            last_file_split = file.split('/')[-1]
            if str(y)[2:] in last_file_split:
                got_year = True
                break
        if got_year == False:
            timedelta_diff = datetime.date(y + 1, 1, 1) - datetime.date(
                y, 1, 1)
            ndays_missing = timedelta_diff.days
            continue

        print file

        valid = True
        with open(file, 'rb') as f:
            reader = csv.reader(f, delimiter=',')
            counter = 0

            #get resolution
            for row in reader:
                if counter == 0:
                    all_units = row

                elif counter == 1:
                    file_res = 'H'

                    try:
                        hour_index = row.index('Hour')
                    except:
                        file_res = 'D'
                    try:
                        day_index = row.index('Day')
                    except:
                        file_res = 'M'
                    month_index = row.index('Month')
                    year_index = row.index('Year')

                    try:
                        spec_index = row.index(species.upper())
                        unit = all_units[spec_index]
                    except:
                        valid = False
                        break

                    #make sure each year units are ppb
                    if unit != 'ppb':
                        print 'Units not ppb!'
                        1 + 'a'

                if counter == 2:
                    if file_res == 'H':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = row[hour_index]
                    elif file_res == 'D':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = 1
                    elif file_res == 'M':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = 1
                        hh = 1

                    start_datetime = datetime.datetime(int(yyyy), int(mm),
                                                       int(dd), int(hh))

                if counter == 3:
                    if file_res == 'H':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = row[hour_index]
                    elif file_res == 'D':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = 1
                    elif file_res == 'M':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = 1
                        hh = 1

                    present_datetime = datetime.datetime(
                        int(yyyy), int(mm), int(dd), int(hh))

                    time_delt = present_datetime - start_datetime
                    hour_delt = datetime.timedelta(hours=1)
                    day_delt = datetime.timedelta(hours=24)
                    week_delt = datetime.timedelta(hours=24 * 7)
                    month_delt = datetime.timedelta(hours=24 * 28)

                    print time_delt

                    if (time_delt < day_delt):
                        print 'Hourly Data'
                        file_res = 'H'
                        site_resolutions.append(file_res)

                    elif (time_delt > hour_delt) & (time_delt < week_delt):
                        print 'Daily Data'
                        file_res = 'D'
                        site_resolutions.append(file_res)

                    elif (time_delt > week_delt):
                        print 'Monthly Data'
                        file_res = 'M'
                        site_resolutions.append(file_res)

                counter += 1

        #READ IN DATA
        if valid == True:
            #limit to sites with hourly date files for, if required
            if output_res == 'H':
                if file_res != 'H':
                    print 'Not processing as only want hourly files'
                    continue
            if output_res == 'HD':
                if file_res == 'M':
                    print 'Not processing as only want hourly and daily files'
                    continue
            with open(file, 'rb') as f:
                reader = csv.reader(f, delimiter=',')
                counter = 0
                val_count = 0
                for row in reader:

                    if counter >= 2:
                        yyyy = row[year_index]
                        mm = row[month_index]

                        #add to n_obs_all
                        n_all += 1
                        n_after_nometa += 1

                        if file_res == 'H':
                            try:
                                vals = np.append(vals,
                                                 np.float64(row[spec_index]))
                            except:
                                vals = np.append(vals, -99999)

                            current_datetime = present_datetime + relativedelta(
                                hours=val_count)
                            yyyymmdd.append(
                                current_datetime.strftime("%Y%m%d"))
                            hhmm.append(current_datetime.strftime("%H%M"))
                            n_dup_array = np.append(n_dup_array, 0)

                        elif file_res == 'D':
                            try:
                                vals = np.append(
                                    vals, [np.float64(row[spec_index])] * 24)
                            except:
                                vals = np.append(vals, [-99999] * 24)

                            current_datetime = present_datetime + relativedelta(
                                days=val_count)
                            next_datetime = present_datetime + relativedelta(
                                days=val_count + 1)
                            all_datetimes = pd.date_range(current_datetime,
                                                          next_datetime,
                                                          freq='H')[:-1]
                            for d in all_datetimes:
                                yyyymmdd.append(d.strftime("%Y%m%d"))
                                hhmm.append(d.strftime("%H%M"))
                            n_dup_array = np.append(n_dup_array, 0)
                            n_dup_array = np.append(n_dup_array, [1] * 23)

                        elif file_res == 'M':
                            month_days = monthrange(int(yyyy), int(mm))[1]
                            try:
                                vals = np.append(
                                    vals, [np.float64(row[spec_index])] *
                                    (month_days * 24))
                            except:
                                vals = np.append(vals,
                                                 [-99999] * (month_days * 24))

                            current_datetime = present_datetime + relativedelta(
                                months=int(mm) - 1)
                            next_datetime = present_datetime + relativedelta(
                                months=int(mm))
                            all_datetimes = pd.date_range(current_datetime,
                                                          next_datetime,
                                                          freq='H')[:-1]
                            for d in all_datetimes:
                                yyyymmdd.append(d.strftime("%Y%m%d"))
                                hhmm.append(d.strftime("%H%M"))
                            n_dup_array = np.append(n_dup_array, 0)
                            n_dup_array = np.append(n_dup_array, [1] *
                                                    ((month_days * 24) - 1))

                        val_count += 1
                    counter += 1

        else:
            print 'Species is not in file header. Skipping Year'
            timedelta_diff = datetime.date(y + 1, 1, 1) - datetime.date(
                y, 1, 1)
            ndays_missing = timedelta_diff.days
            print 'ndays missing = ', ndays_missing

    #test if have no data due to not required time resolution, if so exit
    if len(vals) == 0:
        n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_obs_after_anyvaliddata, inv_nokeymeta, n_obs_after_nokeymeta, inv_resolution, n_obs_after_resolution, inv_badmeasurementmethod, n_obs_after_badmeasurementmethod = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na',
            'na'
        ]
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, 'nothourly', np.zeros(
            0)

    #create max possible grid
    full_data = np.empty(n_hours)
    full_data_after_flagsandlod = np.empty(n_hours)
    big_n_dup_array = np.zeros(n_hours)
    full_data[:] = -99999
    full_data_after_flagsandlod[:] = -99999

    #convert blank values to -99999
    test_inv = vals == ''
    vals[test_inv] = -99999

    #convert number invalids to -99999
    test_inv = vals < 0
    vals[test_inv] = -99999

    #if all site resolutions are same continue then take first file_res
    all_same = all(x == site_resolutions[0] for x in site_resolutions)
    if all_same == True:
        file_res = site_resolutions[0]
    else:
        #otherwise take lowest frequency res as file_res
        if 'M' in site_resolutions:
            file_res = 'M'
        elif 'D' in site_resolutions:
            file_res = 'D'
        else:
            file_res = 'H'

    #get meta
    i_ref = file_refs.index(ref)
    site_ref = ref
    data_tz = np.float32(met_tz[i_ref])
    all_tz = [data_tz]
    lat = np.float32(met_lats[i_ref])
    lon = np.float32(met_lons[i_ref])
    alt = np.float32(met_alts[i_ref])
    raw_class_name = met_class[i_ref]
    country = met_country[i_ref]
    unit = str(unit)
    contact = 'Ayako Aoyagi, Asia Center for Air Pollution Research, [email protected]'

    #adjust dates and times if tz is not equal to 0
    tz = int(data_tz)
    if tz != 0:
        for i in range(len(yyyymmdd)):
            #create datetime
            dt = datetime.datetime(int(yyyymmdd[i][:4]), int(yyyymmdd[i][4:6]),
                                   int(yyyymmdd[i][6:]), int(hhmm[i][:2]),
                                   int(hhmm[i][2:]))
            if tz > 0:
                dt = dt - datetime.timedelta(hours=int(tz))
            elif tz < 0:
                dt = dt + datetime.timedelta(hours=np.abs(int(tz)))
            yyyymmdd[i] = dt.strftime("%Y%m%d")
            hhmm[i] = dt.strftime("%H%M")

    #put vals into full grid
    date_con = np.array(yyyymmdd).astype(int)
    time_con = np.array(hhmm).astype(int)

    #remove data < 1970 and >= 2015
    test_inds = (date_con >= 19700101) & (date_con < 20150101)
    date_con = date_con[test_inds]
    time_con = time_con[test_inds]
    vals = vals[test_inds]
    n_dup_array = n_dup_array[test_inds]

    #set st_big and mm_big
    st_big = ['continuous'] * len(vals)
    mm_big = [m_method] * len(vals)

    #get obs valid
    test = vals >= 0
    valid_hours_dup = np.sum(n_dup_array[test])
    n_obs_valid = int(len(vals[test]) - valid_hours_dup)
    n_after_flagsandlod += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con, time_con, start_year)
    converted_time = np.round(converted_time, decimals=5)
    syn_grid_time = np.arange(0, n_days, 1. / 24)
    syn_grid_time = np.round(syn_grid_time, decimals=5)
    raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    vals = np.array(vals)
    full_data_after_flagsandlod[raw_indices] = vals
    raw_st = np.copy(st_big)
    raw_mm = np.copy(mm_big)

    # test and remove duplicate and overlap points
    converted_time, vals, mm_big, st_big, n_dup_array = modules.remove_duplicate_points(
        site_ref, converted_time, vals, mm_big, st_big, n_dup_array,
        output_res)
    test = vals >= 0
    valid_hours_dup = np.sum(n_dup_array[test])
    n_obs_valid = int(len(vals[test]) - valid_hours_dup)
    n_after_duplicate += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    full_data[indices] = vals
    big_n_dup_array[indices] = n_dup_array

    key_meta = [lat, lon, alt]

    #get sampling/instrument grids
    raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(
        site_ref, process_group, species, raw_st, raw_mm,
        full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list,
        unknown_mm_refs_list, no2_type)

    #do quality checks
    data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control(
        site_ref, species, file_res, no2_type, grid_dates, full_data,
        big_n_dup_array, valid_hours_dup, raw_st_grid, p_st_grid,
        p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid,
        p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta,
        n_all, inv_nometa, n_after_nometa, n_after_flagsandlod,
        n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata,
        inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution,
        inv_badmeasurementmethod, n_after_badmeasurementmethod)
    if data_valid == False:
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na'
        ]
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)

    #make tz int after checks
    data_tz = np.float32(data_tz)

    #set processed unit
    p_unit = 'pbbv'

    #get local timezone
    try:
        local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True)
        pytz_obj = pytz.timezone(local_tz_name)
        datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1))
        if datetime_offset < datetime.timedelta(0):
            local_tz = -(24 - int(datetime_offset.seconds / 60 / 60))
        else:
            local_tz = int(datetime_offset.seconds / 60 / 60)
    except:
        local_tz = 'na'
        print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref)
        unknown_local_tz_list.append(site_ref)

    #pack meta
    meta = [
        lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz,
        local_tz, site_name, country, contact
    ]

    #if blank strings in meta then convert to 'na'
    for i in range(len(meta)):
        try:
            if meta[i].strip() == '':
                meta[i] = 'na'
        except:
            pass

    print set(raw_st_grid)
    print set(raw_mm_grid)
    print set(p_st_grid)
    print set(p_mm_grid)
    print meta

    exit_c_list = np.array([
        inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
        inv_badmeasurementmethod
    ])
    n_c_list = np.array([
        n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
        n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
        n_after_badmeasurementmethod
    ])
    print 'exit counts = ', exit_c_list
    print 'n obs counts = ', n_c_list

    unknown_list = [
        unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
    ]

    return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
Пример #11
0
def onpick(event):
    global pl

    global ind
    global fig2

    ind = event.ind
    ind = ind[0]
    #x_data = event.xdata
    #y_data = event.ydata

    #find ind of closest lat/lon
    #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data)

    try:
        for i in range(len(pl)):
            pl.pop(0).remove()
            first_run = False

    except:
        first_run = True
        pass

    pl = m.plot([X[ind]], [Y[ind]],
                'o',
                ms=12,
                alpha=0.6,
                color='yellow',
                zorder=20)

    #get model timeseries for site clicked
    lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, obs_lats[ind],
                                             obs_lons[ind])
    model_var_pick = model_var[:, lat_n, lon_n]
    model_var_pick = model_var_pick * 1e9
    model_var_mask = np.ma.masked_where(model_var_pick <= 0, model_var_pick)

    if model_name == 'MACC':
        model_time_pd = pd.date_range(start=model_datetimes[0],
                                      end=model_datetimes[-1],
                                      freq='H')
        count = 0
        valids = []
        for i in range(len(model_time_pd)):
            if count == 0:
                valids.append(i)
                count += 1
            elif count == 2:
                count = 0
            else:
                count += 1
        model_time_pd = model_time_pd[valids]
        model_var_pd = pd.Series(model_var_mask, index=model_time_pd)
    else:
        model_time_pd = pd.date_range(start=model_datetimes[0],
                                      end=model_datetimes[-1],
                                      freq='H')
        model_var_pd = pd.Series(model_var_mask, index=model_time_pd)

    #get obs timeseries for site clicked
    ref = obs_refs[ind]
    obs_ts_group = obs_root_grp.groups[ref]
    obs_var = obs_ts_group.variables[species.lower()][:]
    group = obs_ts_group.process_group
    lat = obs_ts_group.latitude
    lon = obs_ts_group.longitude
    lon = obs_ts_group.longitude
    alt = obs_ts_group.altitude
    complete = obs_ts_group.completeness
    a_class = obs_ts_group.anthrome_class
    r_class = obs_ts_group.raw_class
    continent = loc_dict[tags[ind]]
    country = obs_ts_group.country

    obs_var_mask = np.ma.masked_where(obs_var <= 0, obs_var)
    obs_time_pd = pd.date_range(start=obs_datetimes[0],
                                end=obs_datetimes[-1],
                                freq='H')
    obs_var_pd = pd.Series(obs_var_mask, index=obs_time_pd)

    #create sine wave from amp/phase
    obs_date_l = obs_date.astype(int)
    obs_time_l = obs_time.astype(int)
    obs_times = modules.date_process(obs_date_l, obs_time_l, start_year)
    obs_times = np.array(obs_times)
    pi2 = np.pi * 2

    #convert phases to radians
    calc = pi2 / 6.
    obs_ha_phase_r = obs_ha_phase[ind] * calc
    calc = pi2 / 12.
    obs_annual_phase_r = obs_annual_phase[ind] * calc

    ha_obs_wave = obs_ha_mag[ind] * (np.cos((pi2 * obs_times / (365.25 / 2.)) -
                                            (obs_ha_phase_r)))
    annual_obs_wave = obs_annual_mag[ind] * (np.cos((pi2 * obs_times /
                                                     (365.25)) -
                                                    (obs_annual_phase_r)))
    seasonal_obs_wave = (ha_obs_wave + annual_obs_wave) + obs_ave[ind]
    obs_seasonal_wave_pd = pd.Series(seasonal_obs_wave, index=obs_time_pd)

    #create sine wave from amp/phase
    mod_date_l = model_date.astype(int)
    mod_time_l = model_time.astype(int)
    mod_times = modules.date_process(mod_date_l, mod_time_l, start_year)
    mod_times = np.array(mod_times)
    pi2 = np.pi * 2

    #convert phases to radians
    calc = pi2 / 6.
    model_ha_phase_r = model_ha_phase[ind] * calc
    calc = pi2 / 12.
    model_annual_phase_r = model_annual_phase[ind] * calc

    ha_model_wave = model_ha_mag[ind] * (np.cos((pi2 * mod_times /
                                                 (365.25 / 2.)) -
                                                (model_ha_phase_r)))
    annual_model_wave = model_annual_mag[ind] * (np.cos(
        (pi2 * mod_times / (365.25)) - (model_annual_phase_r)))
    seasonal_model_wave = (ha_model_wave + annual_model_wave) + model_ave[ind]
    model_seasonal_wave_pd = pd.Series(seasonal_model_wave,
                                       index=model_time_pd)

    #get spectra data
    site_group_obs = root_grp_obs_spec.groups[ref]
    site_group_mod = root_grp_mod_spec.groups[ref]

    obs_period = site_group_obs.variables['period'][:]
    mod_period = site_group_mod.variables['period'][:]

    obs_amp = site_group_obs.variables['amplitude'][:]
    mod_amp = site_group_mod.variables['amplitude'][:]

    fig.canvas.draw()

    if first_run == False:
        plt.close(fig2)
        fig2, (axo, axo2) = plt.subplots(2, figsize=(24, 12))
        fig2.patch.set_facecolor('white')

        #fig2 = plt.figure()

        axo.plot_date(obs_time_pd.to_pydatetime(),
                      obs_var_pd,
                      color='black',
                      markersize=3,
                      label='Observations')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_var_pd,
                      color='red',
                      alpha=0.5,
                      markersize=3,
                      label='%s %s %s %s' %
                      (model_name, version, grid_size, met),
                      markeredgecolor='None')
        axo.plot_date(obs_time_pd.to_pydatetime(),
                      obs_seasonal_wave_pd,
                      color='yellow',
                      markersize=3,
                      label='Obs Seasonal Waveform',
                      markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_seasonal_wave_pd,
                      color='green',
                      markersize=3,
                      label='Model Seasonal Waveform',
                      markeredgecolor='None')

        axo2.loglog(obs_period, obs_amp, color='black', label='Obs')
        axo2.loglog(mod_period,
                    mod_amp,
                    color='red',
                    label='%s %s %s %s' %
                    (model_name, version, grid_size, met))

        axo2.text(0.01,
                  0.95,
                  'Obs D Amp = %8.2f ppb' % (obs_daily_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.92,
                  'Model D Amp = %8.2f ppb' % (model_daily_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.85,
                  'Obs HA Amp = %8.2f ppb' % (obs_ha_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.82,
                  'Model HA Amp = %8.2f ppb' % (model_ha_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.75,
                  'Obs A Amp = %8.2f ppb' % (obs_annual_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.72,
                  'Model A Amp = %8.2f ppb' % (model_annual_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.55,
                  'Obs D Phase = %8.2f' % (obs_daily_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.52,
                  'Model D Phase = %8.2f' % (model_daily_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.45,
                  'Obs HA Phase = %8.2f' % (obs_ha_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.42,
                  'Model HA Phase = %8.2f' % (model_ha_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        obs_a_ph = obs_annual_phase[ind]
        mod_a_ph = model_annual_phase[ind]

        if obs_a_ph > 12:
            obs_a_ph = obs_a_ph - 12.
        if mod_a_ph > 12:
            mod_a_ph = mod_a_ph - 12.

        axo2.text(0.01,
                  0.35,
                  'Obs A Phase = %8.2f' % (obs_a_ph),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.32,
                  'Model A Phase = %8.2f' % (mod_a_ph),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.15,
                  'Obs Ave = %8.2f ppb' % (obs_ave[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.12,
                  'Model Ave = %8.2f ppb' % (model_ave[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.axvline(1., ymin=0, ymax=1, color='blue', linestyle='--')
        axo2.axvline(182.625, ymin=0, ymax=1, color='blue', linestyle='--')
        axo2.axvline(365.25, ymin=0, ymax=1, color='blue', linestyle='--')

        axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter))
        plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter))

        axo.set_title(
            'Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s'
            % (ref, country, continent, group, lat, lon, alt, complete,
               a_class, r_class, lat_n, lon_n))

        plt.legend(loc='lower right')
        plt.tight_layout()
        axo.grid()
        axo2.grid()

        plt.show()
    else:
        #fig2 = plt.figure()
        fig2, (axo, axo2) = plt.subplots(2, figsize=(24, 12))
        fig2.patch.set_facecolor('white')

        axo.plot_date(obs_time_pd.to_pydatetime(),
                      obs_var_pd,
                      color='black',
                      markersize=3,
                      label='Observations')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_var_pd,
                      color='red',
                      markersize=3,
                      alpha=0.5,
                      label='%s %s %s %s' %
                      (model_name, version, grid_size, met),
                      markeredgecolor='None')
        axo.plot_date(obs_time_pd.to_pydatetime(),
                      obs_seasonal_wave_pd,
                      color='yellow',
                      markersize=3,
                      label='Obs Seasonal Waveform',
                      markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_seasonal_wave_pd,
                      color='green',
                      markersize=3,
                      label='Model Seasonal Waveform',
                      markeredgecolor='None')

        axo2.loglog(obs_period, obs_amp, color='black', label='Obs')
        axo2.loglog(mod_period,
                    mod_amp,
                    color='red',
                    label='%s %s %s %s' %
                    (model_name, version, grid_size, met))

        axo2.text(0.01,
                  0.95,
                  'Obs D Amp = %8.2f ppb' % (obs_daily_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.92,
                  'Model D Amp = %8.2f ppb' % (model_daily_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.85,
                  'Obs HA Amp = %8.2f ppb' % (obs_ha_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.82,
                  'Model HA Amp = %8.2f ppb' % (model_ha_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.75,
                  'Obs A Amp = %8.2f ppb' % (obs_annual_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.72,
                  'Model A Amp = %8.2f ppb' % (model_annual_mag[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.55,
                  'Obs D Phase = %8.2f' % (obs_daily_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.52,
                  'Model D Phase = %8.2f' % (model_daily_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.45,
                  'Obs HA Phase = %8.2f' % (obs_ha_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.42,
                  'Model HA Phase = %8.2f' % (model_ha_phase[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        obs_a_ph = obs_annual_phase[ind]
        mod_a_ph = model_annual_phase[ind]

        if obs_a_ph > 12:
            obs_a_ph = obs_a_ph - 12.
        if mod_a_ph > 12:
            mod_a_ph = mod_a_ph - 12.

        axo2.text(0.01,
                  0.35,
                  'Obs A Phase = %8.2f' % (obs_a_ph),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.32,
                  'Model A Phase = %8.2f' % (mod_a_ph),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.text(0.01,
                  0.15,
                  'Obs Ave = %8.2f ppb' % (obs_ave[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold')
        axo2.text(0.01,
                  0.12,
                  'Model Ave = %8.2f ppb' % (model_ave[ind]),
                  transform=axo2.transAxes,
                  fontweight='bold',
                  color='red')

        axo2.axvline(1., ymin=0, ymax=1, color='blue', linestyle='--')
        axo2.axvline(182.625, ymin=0, ymax=1, color='blue', linestyle='--')
        axo2.axvline(365.25, ymin=0, ymax=1, color='blue', linestyle='--')

        axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter))
        plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter))

        axo.set_title(
            'Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s'
            % (ref, country, continent, group, lat, lon, alt, complete,
               a_class, r_class, lat_n, lon_n))

        plt.legend(loc='lower right')
        plt.tight_layout()
        axo.grid()
        axo2.grid()

        plt.show()
Пример #12
0
def site_iter_process(valid_refs, c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

    #process data for each site at a time
    site_ref = valid_refs[c]
    data_valid = True
    print 'ref = ', site_ref, c

    #get all files for ref
    all_files = glob.glob(
        '/work/home/db876/observations/surface/O3/SEARCH/%s*' % (site_ref))

    file_years = [i[-8:-4] for i in all_files]

    #sort files
    all_files = [x for (y, x) in sorted(zip(file_years, all_files))]

    dates = []
    times = []
    site_vals = []

    print all_files

    for f in all_files:
        print f
        if f[-3:] == 'xls':
            spec_str = species
            flag_str = '%s FL' % (species)
            date_str = 'DATE/TIME'
            all_data = get_data(f)
            all_data = all_data.values()
            headers = all_data[0][2]
            date_ind = headers.index(date_str)
            spec_ind = headers.index(spec_str)
            flag_ind = headers.index(flag_str)

            data_cut = all_data[0][3:]

            for i in range(len(data_cut)):
                row_cut = data_cut[i]
                if len(row_cut) < 30:
                    diff = 30 - len(row_cut)
                    for x in range(diff):
                        row_cut.append('')

                dates.append(row_cut[date_ind].strftime("%Y%m%d"))
                times.append(row_cut[date_ind].strftime("%H%M"))

                try:
                    val = np.float64(row_cut[spec_ind])
                except:
                    val = -99999

                if (row_cut[flag_ind] == 'I') or (row_cut[flag_ind]
                                                  == 'C') or (val < 0):
                    site_vals.append(-99999)
                else:
                    site_vals.append(val)

        elif f[-3:] == 'csv':
            date_str = 'Date/Time[LST]'
            spec_str = 'Average %s[ppb]' % (species)
            flag_str = 'Flags[%s]' % (species)
            mycsv = csv.reader(open(f), delimiter=',')
            start_read = 999999
            row_count = 0
            for row in mycsv:
                try:
                    if row[0] == date_str:
                        date_ind = 0
                        spec_ind = row.index(spec_str)
                        flag_ind = row.index(flag_str)
                        start_read = row_count + 1
                except:
                    pass

                if row_count >= start_read:
                    dates.append(
                        parser.parse(row[date_ind]).strftime("%Y%m%d"))
                    times.append(parser.parse(row[date_ind]).strftime("%H%M"))
                    #dates.append(row[date_ind][6:10]+row[date_ind][0:2]+row[date_ind][3:5])
                    #times.append(row[date_ind][11:13]+row[date_ind][14:])
                    if ('I' in row[flag_ind]) or ('C' in row[flag_ind]) or (
                            row[flag_ind]
                            == 'Null') or (np.float64(row[spec_ind]) < 0):
                        site_vals.append(-99999)
                    else:
                        site_vals.append(np.float64(row[spec_ind]))

                row_count += 1

    site_vals = np.array(site_vals)

    #adjust dates and times if tz is not equal to 0
    data_tz = tz_dict[site_ref]
    if data_tz != 0:
        for i in range(len(dates)):
            #create datetime
            dt = datetime.datetime(int(dates[i][:4]), int(dates[i][4:6]),
                                   int(dates[i][6:]), int(times[i][:2]),
                                   int(times[i][2:]))
            if data_tz > 0:
                dt = dt - datetime.timedelta(hours=int(data_tz))
            elif data_tz < 0:
                dt = dt + datetime.timedelta(hours=np.abs(int(data_tz)))
            dates[i] = dt.strftime("%Y%m%d")
            times[i] = dt.strftime("%H%M")

    #add val to total obs count
    n_all += len(site_vals)
    n_after_nometa += len(site_vals)

    #put vals into full grid
    date_con = np.array(dates).astype(int)
    time_con = np.array(times).astype(int)

    #remove data < 1970 and >= 2015
    test_inds = (date_con >= 19700101) & (date_con < 20150101)
    date_con = date_con[test_inds]
    time_con = time_con[test_inds]
    site_vals = site_vals[test_inds]

    #set st_big as 'continuous'
    st_big = ['continuous'] * len(site_vals)

    #set mm_big
    if species == 'O3':
        mm_big = ['ultraviolet photometry'] * len(site_vals)
    elif species == 'NO':
        mm_big = ['chemiluminescence'] * len(site_vals)
    elif species == 'NO2':
        mm_big = ['chemiluminescence (conversion-photolysis)'] * len(site_vals)
    elif species == 'CO':
        mm_big = ['non-dispersive infrared spectroscopy'] * len(site_vals)

    #get obs valid after flagsandlod
    test = site_vals >= 0
    n_obs_valid = len(site_vals[test])
    n_after_flagsandlod += n_obs_valid

    #create max possible grid
    full_data = np.empty(n_hours)
    full_data_after_flagsandlod = np.empty(n_hours)
    big_n_dup_array = np.zeros(n_hours)
    full_data[:] = -99999
    full_data_after_flagsandlod[:] = -99999

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con, time_con, start_year)
    converted_time = np.round(converted_time, decimals=5)
    syn_grid_time = np.arange(0, n_days, 1. / 24)
    syn_grid_time = np.round(syn_grid_time, decimals=5)
    raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    full_data_after_flagsandlod[raw_indices] = site_vals
    raw_st = np.copy(st_big)
    raw_mm = np.copy(mm_big)

    #test and remove duplicate and overlap points
    converted_time, site_vals, mm_big, st_big, na = modules.remove_duplicate_points(
        site_ref, converted_time, site_vals, mm_big, st_big, 'blank',
        output_res)
    test = site_vals >= 0
    n_obs_valid = int(len(site_vals[test]))
    print 'n obs valid = ', n_obs_valid
    n_after_duplicate += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    full_data[indices] = site_vals

    #get site meta
    lat = lat_dict[site_ref]
    lon = lon_dict[site_ref]
    alt = alt_dict[site_ref]
    unit = 'ppb'
    raw_class_name = raw_class_dict[site_ref]
    site_name = sitename_dict[site_ref]
    country = 'United States'
    contact = '*****@*****.**'
    all_tz = [data_tz]

    key_meta = [lat, lon, alt]

    #set site file resolution as hourly
    file_res = 'H'

    #get sampling/instrument grids
    raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(
        site_ref, process_group, species, raw_st, raw_mm,
        full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list,
        unknown_mm_refs_list, no2_type)

    #do quality checks
    data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control(
        site_ref, species, file_res, no2_type, grid_dates, full_data,
        big_n_dup_array, 0, raw_st_grid, p_st_grid,
        p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid,
        p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta,
        n_all, inv_nometa, n_after_nometa, n_after_flagsandlod,
        n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata,
        inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution,
        inv_badmeasurementmethod, n_after_badmeasurementmethod)
    if data_valid == False:
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na'
        ]
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)

    #set processed unit
    p_unit = 'pbbv'

    #get local timezone
    try:
        local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True)
        pytz_obj = pytz.timezone(local_tz_name)
        datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1))
        if datetime_offset < datetime.timedelta(0):
            local_tz = -(24 - int(datetime_offset.seconds / 60 / 60))
        else:
            local_tz = int(datetime_offset.seconds / 60 / 60)
    except:
        local_tz = 'na'
        print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref)
        unknown_local_tz_list.append(site_ref)

    #pack meta
    meta = [
        lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz,
        local_tz, site_name, country, contact
    ]

    #if blank strings in meta then convert to 'na'
    for i in range(len(meta)):
        try:
            if meta[i].strip() == '':
                meta[i] = 'na'
        except:
            pass

    print set(raw_st_grid)
    print set(raw_mm_grid)
    print set(p_st_grid)
    print set(p_mm_grid)
    print meta

    exit_c_list = np.array([
        inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
        inv_badmeasurementmethod
    ])
    n_c_list = np.array([
        n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
        n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
        n_after_badmeasurementmethod
    ])
    print 'exit counts = ', exit_c_list
    print 'n obs counts = ', n_c_list

    unknown_list = [
        unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
    ]

    return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
def site_iter_process(valid_refs,c):
#for site_ref in valid_refs:
    site_ref = valid_refs[c]
 
    data_valid = True
    print 'ref = ',site_ref
    site_test = all_refs == site_ref
    
    site_yyyymmdd = yyyymmdd[site_test]
    site_hhmm = hhmm[site_test]
    site_vals = vals[site_test]
    
    site_vals = np.float64(site_vals)
 
    #convert all invalids to -99999
    test_inv = site_vals < 0
    site_vals[test_inv] = -99999

    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)
    
    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    full_data[indices] = site_vals
    
    meta_index = meta_refs.index(site_ref)
    tz = float(meta_tz[meta_index])
    lat = np.float64(meta_lats[meta_index])
    lon = np.float64(meta_lons[meta_index])
    alt = np.float64(meta_alts[meta_index])
    raw_class_name = meta_class[meta_index]
    anthrome_class_name = class_name[meta_index]
        
    #check tz is whole number else skip site
    if (tz % 1) != 0:
        data_valid = False
        print 'Timezone is not a whole number. Skipping.'
        
    #correct timezone to UTC
    tz = int(tz)
    if tz < 0:
        #get rid of values at start and append -99999's at end
        cut = full_data[:tz]
        for num in range(np.abs(tz)):
            cut = np.insert(cut,0, -99999)
        full_data = cut
    elif tz > 0:
        #put -99999's at start and get rid of values at end
        cut = full_data[tz:]
        for num in range(tz):
            cut = np.append(cut, -99999)
        full_data = cut
            
    #if species is CO then convert units from ppmv to ppbv
    if species == 'CO':
        valid_inds = full_data != -99999 
        full_data[valid_inds] = full_data[valid_inds]*1e3        
    
    #do data quality checks        
    full_data,data_valid = modules.quality_check(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)
    
    #set sampling as average
    if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'):
        st = 'average'
    elif (species == 'ISOP'):
        st = 'flask'   
    
    #set site file resolution
    if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'):
        file_res = 'H'
    elif (species == 'ISOP'):
        file_res = 'D'
    
    #check file res is ok for output res
    if (output_res == 'H'):
        if (file_res == 'D') or (file_res == 'M'):
            print 'File resolution has to be Minimum Hourly. Skipping'
            data_valid = False
            return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na'
    elif (output_res == 'D'):
        if (file_res == 'M'):
            print 'File resolution has to be Minimum Daily. Skipping'
            data_valid = False
            return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na'
    
    #set mm
    if species == 'O3':
        mm = 'ultraviolet photometry'
    elif (species == 'NO') or (species == 'NO2'):
        mm = 'chemiluminescence'
    elif species == 'CO':
        mm = 'non-dispersive infrared spectrometry'
    elif species == 'ISOP':
        mm = 'gas chromatography flame ionisation detection'
        
        
    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
Пример #14
0
    % (first_year, last_year + 1, '   '.join(i for i in valid_refs)))

#read in specific site data
site_group = root_grp.groups[site_ref]

#read in variables for site
obs_var = site_group.variables['o3'][:]
full_obs_var = obs_var[:]
full_obs_var_mask = np.ma.masked_where(full_obs_var <= 0, full_obs_var)
obs_date = site_group.variables['date'][:]
obs_time = site_group.variables['time'][:]
obs_lat = site_group.latitude
obs_lon = site_group.longitude
obs_alt = site_group.altitude

obs_times = modules.date_process(obs_date, obs_time)
obs_times = np.array(obs_times)
obs_times_full = obs_times[:]

##cut out invalid obs data
obs_var_mask = np.ma.masked_where(obs_var <= 0, obs_var)
valids = obs_var > 0
obs_var = obs_var[valids]
obs_times = obs_times[valids]

obs_ave = np.average(obs_var)

year_val = []
month_val = []
day_val = []
hour_val = []
Пример #15
0
#----------------------------------------
#find model data gridbox to compare with obs.

#get model gridbox for obs site
lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, obs_lat, obs_lon)

model_var = model_var[:, lat_n, lon_n]
model_var = model_var * 1e9

model_var_mask = np.ma.masked_where(model_var <= 0, model_var)
model_ave = np.ma.average(model_var_mask)

#--------------------------------------------
#take half daily average of obs and model

obs_time = modules.date_process(obs_date, obs_time, start_year)
model_time = modules.date_process(model_date, model_time, start_year)

divisor = 6

#take half daily average of obs
total_len = len(obs_var_mask) / divisor
start = 0
end = divisor
ave_obs_var = []
ave_obs_time = []
for i in range(total_len):
    ave = np.ma.average(obs_var_mask[start:end])
    ave_obs_time = np.append(ave_obs_time, obs_time[start])
    ave_obs_var = np.append(ave_obs_var, ave)
    start += divisor
def onpick(event):
    global pl
    
    global ind
    global fig2
    
    ind = event.ind
    print 'ind = ',ind
    ind = ind[0]
    #x_data = event.xdata
    #y_data = event.ydata

    #find ind of closest lat/lon
    #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data)
    
    try:
        for i in range(len(pl)):
            pl.pop(0).remove()
            first_run = False  
        
    except:
        first_run = True
        pass
    
    pl = m.plot([linear_lons[ind]], [linear_lats[ind]], 's', ms=20, alpha=0.6, color='yellow',zorder=20)

    
    #get model timeseries for site clicked
    lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,linear_lats[ind],linear_lons[ind])
    model_var_pick = model_var[:,lat_n,lon_n]
    model_var_pick = model_var_pick*1e9
    model_var_mask = np.ma.masked_where(model_var_pick<=0,model_var_pick)
    model_time_pd = pd.date_range(start = model_datetimes[0],end = model_datetimes[-1], freq = 'H')
    model_var_pd = pd.Series(model_var_mask, index=model_time_pd)

        
    #create sine wave from amp/phase
    model_date_l = model_date.astype(int)
    model_time_l = model_time.astype(int)
    model_times = modules.date_process(model_date_l,model_time_l,start_year)
    model_times = np.array(model_times)
    pi2 = np.pi*2
    
    ratio = 100./annual_amp[lat_n,lon_n]
    ha_percent = ratio*annual_amp[lat_n,lon_n]
    
    #convert phases to radians
    calc = pi2/24.
    
    calc = pi2/6.
    ha_ph_r = ha_ph[lat_n,lon_n] * calc
    calc = pi2/12.
    annual_ph_r = annual_ph[lat_n,lon_n] * calc
    
    ha_model_wave = ha_amp[lat_n,lon_n]*(np.cos((pi2*model_times/(365.25/2.))-(ha_ph_r)))
    annual_model_wave = annual_amp[lat_n,lon_n]*(np.cos((pi2*model_times/(365.25))-(annual_ph_r)))
    
    ha_primary = p_ha_ph[lat_n,lon_n]
    ha_secondary = s_ha_ph[lat_n,lon_n]
    
    ha_model_wave = ha_model_wave+ave[lat_n,lon_n]
    annual_model_wave = annual_model_wave+ave[lat_n,lon_n]
    
    model_ha_wave_pd = pd.Series(ha_model_wave, index=model_time_pd)
    model_annual_wave_pd = pd.Series(annual_model_wave, index=model_time_pd)
    
    
    fig.canvas.draw()
        
    if first_run == False:
        plt.close(fig2)
        fig2, (axo) = plt.subplots(1,figsize=(24,12))
        fig2.patch.set_facecolor('white')
        
        axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='black', markersize = 3, label = 'Observations')
        axo.plot_date(model_time_pd.to_pydatetime(), model_ha_wave_pd, color='green', markersize = 3, label = 'Ha Waveform',markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(), model_annual_wave_pd, color='red', markersize = 3, label = 'Annual Waveform',markeredgecolor='None')
        
        #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent)) 
        
        plt.legend(loc = 'lower right')
        plt.tight_layout()
        axo.grid()
        
        plt.show()
    else:
        fig2, (axo) = plt.subplots(1,figsize=(24,12))
        fig2.patch.set_facecolor('white')
        
        axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='black', markersize = 3, label = 'Observations')
        axo.plot_date(model_time_pd.to_pydatetime(), model_ha_wave_pd, color='green', markersize = 3, label = 'Ha Waveform',markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(), model_annual_wave_pd, color='red', markersize = 3, label = 'Annual Waveform',markeredgecolor='None')
        
        #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent))
        
        plt.legend(loc = 'lower right')
        plt.tight_layout()
        axo.grid()
        
        plt.show()
Пример #17
0
def site_iter_process(valid_refs, c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

    data_valid = True

    site_data = data[c]
    site_meta = site_data[0]
    file_res = resolutions[c]

    #get data and metadata
    try:
        lat = np.float32(site_meta['LATITUDE'])
    except:
        lat = 'na'
    try:
        lon = np.float32(site_meta['LONGITUDE'])
    except:
        lon = 'na'
    try:
        alt = np.float32(site_meta['ALTITUDE'])
    except:
        alt = 'na'
    land_use_class = site_meta['LAND_USE']
    if pd.isnull(land_use_class) == True:
        land_use_class = 'na'
    station_class = site_meta['STATION CATEGORY']
    if pd.isnull(station_class) == True:
        station_class = 'na'
    raw_class_name = land_use_class + ' ' + station_class
    mm = site_meta['MEASUREMENT METHOD']
    if pd.isnull(mm) == True:
        mm = ''
    country = site_meta['COUNTRY/TERRITORY']
    if pd.isnull(country) == True:
        country = 'na'
    site_name = site_meta['STATION NAME']
    if pd.isnull(site_name) == True:
        site_name = 'na'
    continuous_check = site_meta['MEASUREMENT AUTOMATIC']
    if pd.isnull(continuous_check) == True:
        continuous_check = 'na'
    unit = site_meta['MEASUREMENT UNIT']
    #integration_time = site_meta['TIME INTERVAL']
    tz = site_meta['TIME ZONE']
    contact = '*****@*****.**'
    #convert timezone from str to int
    tzd = {'UTC': 0, 'CET': 1, 'EET': 2}
    data_tz = tzd[tz]
    all_tz = [data_tz]

    if (file_res == 'hr') or (file_res == 'da'):
        var = np.array(site_data[1].values.tolist())
    elif file_res == 'mo':
        all_var = np.array(site_data[1].values.tolist())
        var = np.array(all_var[:, 1]).astype('float64')
        end_times = all_var[:, 0]
        end_date_con = [d[:4] + d[5:7] + d[8:10] for d in end_times]
        end_time_con = [d[11:13] + d[14:] for d in end_times]

    times = site_data[1].index
    date_con = [d.strftime('%Y%m%d') for d in times]
    time_con = [d.strftime('%H%M') for d in times]

    #get ref
    site_ref = valid_refs[c]
    site_group = group_codes[c]

    print 'ref == %s, %s' % (site_ref, c)
    print 'res = ', file_res

    #add var to total obs count
    n_all += len(var)
    n_after_nometa += len(var)

    #if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
    count = 0
    if file_res == 'hr':
        n_dup_array = np.zeros(len(var))

    elif file_res == 'da':
        n_dup_array = []
        file_hours = len(date_con)
        for i in range(file_hours):
            current_hh = int(time_con[count][:2])
            current_mm = int(time_con[count][2:])
            s = datetime.datetime(year=start_year,
                                  month=1,
                                  day=1,
                                  hour=current_hh,
                                  minute=current_mm)
            e = datetime.datetime(year=start_year,
                                  month=1,
                                  day=2,
                                  hour=current_hh,
                                  minute=current_mm)
            day_hours = [
                d.strftime('%H%M') for d in pd.date_range(s, e, freq='H')
            ][1:-1]

            date_con = np.insert(date_con, count + 1, [date_con[count]] * 23)
            time_con = np.insert(time_con, count + 1, day_hours)
            var = np.insert(var, count + 1, [var[count]] * 23)

            #append to n duplicated array
            n_dup_array = np.append(n_dup_array, 0)
            n_dup_array = np.append(n_dup_array, [1] * 23)

            count += 24

    elif file_res == 'mo':
        n_dup_array = []
        file_hours = len(date_con)

        for i in range(file_hours):
            current_year = int(date_con[count][:4])
            current_month = int(date_con[count][4:6])
            current_day = int(date_con[count][6:])
            current_hour = int(time_con[count][:2])
            current_min = int(time_con[count][2:])

            next_year = int(end_date_con[i][:4])
            next_month = int(end_date_con[i][4:6])
            next_day = int(end_date_con[i][6:])
            next_hour = int(end_time_con[i][:2])
            next_min = int(end_time_con[i][2:])

            s = datetime.datetime(year=current_year,
                                  month=current_month,
                                  day=current_day,
                                  hour=current_hour,
                                  minute=current_min)
            e = datetime.datetime(year=next_year,
                                  month=next_month,
                                  day=next_day,
                                  hour=next_hour,
                                  minute=next_min)

            day_date = [
                d.strftime('%Y%m%d') for d in pd.date_range(s, e, freq='H')
            ][1:-1]
            day_hour = [
                d.strftime('%H%M') for d in pd.date_range(s, e, freq='H')
            ][1:-1]
            date_con = np.insert(date_con, count + 1, day_date)
            time_con = np.insert(time_con, count + 1, day_hour)
            var = np.insert(var, count + 1, [var[count]] * len(day_date))

            #append to n duplicated array
            n_dup_array = np.append(n_dup_array, 0)
            n_dup_array = np.append(n_dup_array, [1] * len(day_date))

            count += (len(day_date) + 1)

    date_con = np.array(date_con).astype(int)
    time_con = np.array(time_con).astype(int)

    #remove data < 1970 and >= 2015
    test_inds = (date_con >= 19700101) & (date_con < 20150101)
    date_con = date_con[test_inds]
    time_con = time_con[test_inds]
    var = var[test_inds]
    n_dup_array = n_dup_array[test_inds]

    #convert nans to -99999's
    nan_inds = np.isnan(var)
    var[nan_inds] = -99999

    if continuous_check == 'yes':
        st_big = ['continuous'] * len(var)
    else:
        st_big = ['filter'] * len(var)
    mm_big = [mm] * len(var)

    #get obs valid
    test = var >= 0
    valid_hours_dup = np.sum(n_dup_array[test])
    n_obs_valid = int(len(var[test]) - valid_hours_dup)
    n_after_flagsandlod += n_obs_valid

    #create max possible grid
    full_data = np.empty(len(grid_dates))
    full_data_after_flagsandlod = np.empty(n_hours)
    big_n_dup_array = np.zeros(n_hours)
    full_data[:] = -99999
    full_data_after_flagsandlod[:] = -99999

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con, time_con, start_year)
    converted_time = np.round(converted_time, decimals=5)
    raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    var = np.array(var)
    full_data_after_flagsandlod[raw_indices] = var
    raw_st = np.copy(st_big)
    raw_mm = np.copy(mm_big)

    #test and remove duplicate and overlap points
    converted_time, var, mm_big, st_big, n_dup_array = modules.remove_duplicate_points(
        site_ref, converted_time, var, mm_big, st_big, n_dup_array, output_res)
    test = var >= 0
    valid_hours_dup = np.sum(n_dup_array[test])
    n_obs_valid = int(len(var[test]) - valid_hours_dup)
    n_after_duplicate += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    full_data[indices] = var
    big_n_dup_array[indices] = n_dup_array

    key_meta = [lat, lon, alt]

    #convert file res to standard format
    if file_res == 'hr':
        file_res = 'H'
    elif file_res == 'da':
        file_res = 'D'
    elif file_res == 'mo':
        file_res = 'M'

    #get sampling/instrument grids
    raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(
        site_ref, process_group, species, raw_st, raw_mm,
        full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list,
        unknown_mm_refs_list, no2_type)

    #do quality checks
    data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control(
        site_ref, species, file_res, no2_type, grid_dates, full_data,
        big_n_dup_array, valid_hours_dup, raw_st_grid, p_st_grid,
        p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid,
        p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta,
        n_all, inv_nometa, n_after_nometa, n_after_flagsandlod,
        n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata,
        inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution,
        inv_badmeasurementmethod, n_after_badmeasurementmethod)
    if data_valid == False:
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na'
        ]
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)

    #set processed unit
    p_unit = 'pbbv'

    #get local timezone
    try:
        local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True)
        pytz_obj = pytz.timezone(local_tz_name)
        datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1))
        if datetime_offset < datetime.timedelta(0):
            local_tz = -(24 - int(datetime_offset.seconds / 60 / 60))
        else:
            local_tz = int(datetime_offset.seconds / 60 / 60)
    except:
        local_tz = 'na'
        print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref)
        unknown_local_tz_list.append(site_ref)

    #pack meta
    meta = [
        lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz,
        local_tz, site_name, country, contact
    ]

    #if blank strings in meta then convert to 'na'
    for i in range(len(meta)):
        try:
            if meta[i].strip() == '':
                meta[i] = 'na'
        except:
            pass

    print set(raw_st_grid)
    print set(raw_mm_grid)
    print set(p_st_grid)
    print set(p_mm_grid)
    print meta

    exit_c_list = np.array([
        inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
        inv_badmeasurementmethod
    ])
    n_c_list = np.array([
        n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
        n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
        n_after_badmeasurementmethod
    ])
    print 'exit counts = ', exit_c_list
    print 'n obs counts = ', n_c_list

    unknown_list = [
        unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
    ]

    return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
Пример #18
0
def site_iter_process(valid_refs, c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

    #for ref_i in range(len(valid_refs)):
    data_valid = True

    site_ref = valid_refs[c]
    print 'Current Ref is = ', site_ref, c

    s_files = glob.glob(
        '/work/home/db876/observations/surface/%s/CAPMON/ozon_smpls_%s*' %
        (species, site_ref))
    site_files = []
    for y in year_array:
        for f in s_files:
            if str(y) in f:
                site_files.append(f)

    site_files = modules.natsorted(site_files)

    yymmdd = []
    hhmm = []
    vals = []

    for file_i in range(len(site_files)):

        count = 0
        meta_start = -99999
        start_read_1 = False
        start_read_2 = False

        with open(site_files[file_i], 'rb') as f:
            reader = csv.reader(f, delimiter=',')
            print site_files[file_i]
            for row in reader:
                #print count
                #break out of loop at bottom of file
                if (start_read_2 == True) & (row[0] == '*TABLE ENDS'):
                    break

            #get metadata
                try:
                    if (row[0] == '*TABLE NAME') & (row[1]
                                                    == 'Site information'):
                        meta_start = count + 2
                except:
                    pass
                if count == meta_start:
                    siteid_i = row.index('Site ID: standard')
                    sitename_i = row.index('Description')
                    lat_i = row.index('Latitude: decimal degrees')
                    lon_i = row.index('Longitude: decimal degrees')
                    try:
                        alt_i = row.index(
                            'Ground elevation: above mean sea level')
                    except:
                        alt_i = row.index('Ground altitude')
                    class_i = row.index('Site land use')

                if count == (meta_start + 6):
                    latitude = row[lat_i]
                    longitude = row[lon_i]
                    altitude = row[alt_i]
                    raw_class_name = row[class_i]
                    site_name = row[sitename_i]

                #get data
                if start_read_2 == True:
                    #read dates, times, and vals
                    date = row[8]
                    time = row[9]
                    yymmdd.append(date[:4] + date[5:7] + date[8:])
                    hhmm.append(time[:2] + time[3:])
                    quality_code = row[13]
                    #if flag not equal to V0 then make -99999
                    if quality_code == 'V0':
                        vals = np.append(vals, np.float64(row[12]))
                    else:
                        vals = np.append(vals, -99999)

                try:
                    if (row[0] == '*TABLE NAME') & (row[1] == 'OZONE_HOURLY'):
                        start_read_1 = True
                except:
                    pass

                if (start_read_1 == True) & (row[0] == '*TABLE COLUMN UNITS'):
                    unit = row[12]

                if (start_read_1 == True) & (row[0] == '*TABLE BEGINS'):
                    start_read_2 = True
                count += 1

    #add to n_obs_all
    n_all += len(vals)
    n_after_nometa += len(vals)

    #convert data less < 0 to -99999
    test_inv = vals < 0
    vals[test_inv] = -99999

    #create max possible grid
    full_data = np.empty(n_hours)
    full_data_after_flagsandlod = np.empty(n_hours)
    big_n_dup_array = np.zeros(n_hours)
    full_data[:] = -99999
    full_data_after_flagsandlod[:] = -99999

    #put vals into full grid
    date_con = np.array(yymmdd).astype(int)
    time_con = np.array(hhmm).astype(int)

    #remove data < 1970 and >= 2015
    test_inds = (date_con >= 19700101) & (date_con < 20150101)
    date_con = date_con[test_inds]
    time_con = time_con[test_inds]
    vals = vals[test_inds]

    #set st_big and mm_big
    st_big = ['continuous'] * len(vals)
    mm_big = ['ultraviolet photometry'] * len(vals)

    #get obs valid
    test = vals != -99999
    n_obs_valid = len(vals[test])
    n_after_flagsandlod += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con, time_con, start_year)
    converted_time = np.round(converted_time, decimals=5)
    syn_grid_time = np.arange(0, n_days, 1. / 24)
    syn_grid_time = np.round(syn_grid_time, decimals=5)
    raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    vals = np.array(vals)
    full_data_after_flagsandlod[raw_indices] = vals
    raw_st = np.copy(st_big)
    raw_mm = np.copy(mm_big)

    # test and remove duplicate and overlap points
    converted_time, vals, mm_big, st_big, na = modules.remove_duplicate_points(
        site_ref, converted_time, vals, mm_big, st_big, 'blank', output_res)
    test = vals >= 0
    n_obs_valid = int(len(vals[test]))
    n_after_duplicate += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    full_data[indices] = vals

    #get metadata
    try:
        lat = np.float32(latitude)
    except:
        lat = 'na'
    try:
        lon = np.float32(longitude)
    except:
        lon = 'na'
    try:
        alt = np.float32(altitude)
    except:
        alt = 'na'
    unit = str(unit)
    raw_class_name = str(raw_class_name)
    site_name = str(site_name)
    country = 'Canada'
    contact = 'Dave MacTavish, 4905 Dufferin St., Toronto ON, CANADA, M3H 5T4, [email protected]'

    #set data tz - all CAPMON times are UTC
    data_tz = 0
    all_tz = [data_tz]

    key_meta = [lat, lon, alt]

    #set site file resolution
    file_res = 'H'

    #get sampling/instrument grids
    raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(
        site_ref, process_group, species, raw_st, raw_mm,
        full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list,
        unknown_mm_refs_list, no2_type)

    #do quality checks
    data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control(
        site_ref, species, file_res, no2_type, grid_dates, full_data,
        big_n_dup_array, 0, raw_st_grid, p_st_grid,
        p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid,
        p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta,
        n_all, inv_nometa, n_after_nometa, n_after_flagsandlod,
        n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata,
        inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution,
        inv_badmeasurementmethod, n_after_badmeasurementmethod)
    if data_valid == False:
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na'
        ]
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)

    #set processed unit
    p_unit = 'pbbv'

    #get local timezone
    try:
        local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True)
        pytz_obj = pytz.timezone(local_tz_name)
        datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1))
        if datetime_offset < datetime.timedelta(0):
            local_tz = -(24 - int(datetime_offset.seconds / 60 / 60))
        else:
            local_tz = int(datetime_offset.seconds / 60 / 60)
    except:
        local_tz = 'na'
        print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref)
        unknown_local_tz_list.append(site_ref)

    #pack meta
    meta = [
        lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz,
        local_tz, site_name, country, contact
    ]

    #if blank strings in meta then convert to 'na'
    for i in range(len(meta)):
        try:
            if meta[i].strip() == '':
                meta[i] = 'na'
        except:
            pass

    print set(raw_st_grid)
    print set(raw_mm_grid)
    print set(p_st_grid)
    print set(p_mm_grid)
    print meta

    exit_c_list = np.array([
        inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
        inv_badmeasurementmethod
    ])
    n_c_list = np.array([
        n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
        n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
        n_after_badmeasurementmethod
    ])
    print 'exit counts = ', exit_c_list
    print 'n obs counts = ', n_c_list

    unknown_list = [
        unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
    ]

    return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
Пример #19
0
def site_iter_process(valid_refs, c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

    data_valid = True
    site_ref = valid_refs[c]
    print 'ref = ', site_ref, c
    site_test = all_refs == site_ref

    site_yyyymmdd = yyyymmdd[site_test]
    site_hhmm = hhmm[site_test]
    site_vals = vals[site_test]
    site_vals = np.array(site_vals)

    #add val to total obs count
    n_all += len(site_vals)

    #test if site_ref in meta_refs, if not then exit
    if site_ref not in meta_refs:
        inv_nometa += 1
        print 'Site Invalid. No Metadata for ref'
        if no2_type == 'MOLYBDENUM':
            n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_obs_after_anyvaliddata, inv_nokeymeta, n_obs_after_nokeymeta, inv_resolution, n_obs_after_resolution, inv_badmeasurementmethod, n_obs_after_badmeasurementmethod = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na',
            'na'
        ]
        exit_r = 'nometa'
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)
    n_after_nometa += len(site_vals)

    #convert blank values to -99999
    test_inv = site_vals == ''
    site_vals[test_inv] = -99999

    #convert number invalids to -99999
    test_inv = site_vals < 0
    site_vals[test_inv] = -99999

    #create max possible grid
    full_data = np.empty(n_hours)
    full_data_after_flagsandlod = np.empty(n_hours)
    big_n_dup_array = np.zeros(n_hours)
    full_data[:] = -99999
    full_data_after_flagsandlod[:] = -99999

    #get meta
    meta_index = meta_refs.index(site_ref)
    data_tz = np.float32(meta_tz[meta_index])
    all_tz = [data_tz]
    try:
        lat = np.float32(meta_lats[meta_index])
    except:
        lat = 'na'
    try:
        lon = np.float32(meta_lons[meta_index])
    except:
        lon = 'na'
    try:
        alt = np.float32(meta_alts[meta_index])
    except:
        alt = 'na'
    unit = 'na'
    raw_class_name = meta_class[meta_index]
    site_name = meta_sitename[meta_index]
    country = 'United States'
    contact = '*****@*****.**'

    #adjust dates and times if tz is not equal to 0
    tz = int(data_tz)
    if tz != 0:
        for i in range(len(site_yyyymmdd)):
            #create datetime
            dt = datetime.datetime(int(site_yyyymmdd[i][:4]),
                                   int(site_yyyymmdd[i][4:6]),
                                   int(site_yyyymmdd[i][6:]),
                                   int(site_hhmm[i][:2]),
                                   int(site_hhmm[i][2:]))
            if tz > 0:
                dt = dt - datetime.timedelta(hours=int(tz))
            elif tz < 0:
                dt = dt + datetime.timedelta(hours=np.abs(int(tz)))
            site_yyyymmdd[i] = dt.strftime("%Y%m%d")
            site_hhmm[i] = dt.strftime("%H%M")

    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)

    #remove data < 1970 and >= 2015
    test_inds = (date_con >= 19700101) & (date_con < 20150101)
    date_con = date_con[test_inds]
    time_con = time_con[test_inds]
    site_vals = site_vals[test_inds]

    #set st_big and mm_big
    st_big = ['continuous'] * len(site_vals)
    if species == 'O3':
        mm_big = ['ultraviolet photometry'] * len(site_vals)
    elif (species == 'NO'):
        mm_big = ['chemiluminescence'] * len(site_vals)
    elif (species == 'CO'):
        mm_big = ['non-dispersive infrared spectroscopy'] * len(site_vals)

    #get obs valid
    test = site_vals >= 0
    n_obs_valid = len(site_vals[test])
    n_after_flagsandlod += n_obs_valid
    print site_vals, n_after_flagsandlod

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con, time_con, start_year)
    converted_time = np.round(converted_time, decimals=5)
    syn_grid_time = np.arange(0, n_days, 1. / 24)
    syn_grid_time = np.round(syn_grid_time, decimals=5)
    raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    full_data_after_flagsandlod[raw_indices] = site_vals
    raw_st = np.copy(st_big)
    raw_mm = np.copy(mm_big)

    # test and remove duplicate and overlap points
    converted_time, site_vals, mm_big, st_big, na = modules.remove_duplicate_points(
        site_ref, converted_time, site_vals, mm_big, st_big, 'blank',
        output_res)
    test = site_vals >= 0
    n_obs_valid = int(len(site_vals[test]))
    n_after_duplicate += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    full_data[indices] = site_vals

    key_meta = [lat, lon, alt]

    #set site file resolution
    file_res = 'H'

    #get sampling/instrument grids
    raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(
        site_ref, process_group, species, raw_st, raw_mm,
        full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list,
        unknown_mm_refs_list, no2_type)

    #do quality checks
    data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control(
        site_ref, species, file_res, no2_type, grid_dates, full_data,
        big_n_dup_array, 0, raw_st_grid, p_st_grid,
        p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid,
        p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta,
        n_all, inv_nometa, n_after_nometa, n_after_flagsandlod,
        n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata,
        inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution,
        inv_badmeasurementmethod, n_after_badmeasurementmethod)
    if data_valid == False:
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na'
        ]
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)

    #make tz int after checks
    data_tz = np.float32(data_tz)

    #set processed unit
    p_unit = 'pbbv'

    #get local timezone
    try:
        local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True)
        pytz_obj = pytz.timezone(local_tz_name)
        datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1))
        if datetime_offset < datetime.timedelta(0):
            local_tz = -(24 - int(datetime_offset.seconds / 60 / 60))
        else:
            local_tz = int(datetime_offset.seconds / 60 / 60)
    except:
        local_tz = 'na'
        print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref)
        unknown_local_tz_list.append(site_ref)

    #pack meta
    meta = [
        lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz,
        local_tz, site_name, country, contact
    ]

    #if blank strings in meta then convert to 'na'
    for i in range(len(meta)):
        try:
            if meta[i].strip() == '':
                meta[i] = 'na'
        except:
            pass

    print set(raw_st_grid)
    print set(raw_mm_grid)
    print set(p_st_grid)
    print set(p_mm_grid)
    print meta

    exit_c_list = np.array([
        inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
        inv_badmeasurementmethod
    ])
    n_c_list = np.array([
        n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
        n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
        n_after_badmeasurementmethod
    ])
    print 'exit counts = ', exit_c_list
    print 'n obs counts = ', n_c_list

    unknown_list = [
        unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
    ]

    return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
Пример #20
0
def run_LSP(mod_data, x):

    lat_i = lat_indices[x]
    lon_i = lon_indices[x]

    print lat_i, lon_i

    current_lat = lat_c[lat_i]
    current_lon = lon_c[lon_i]

    waveform = mod_data

    waveform_ave = np.average(waveform)

    model_date_val = np.copy(model_date)
    model_time_val = np.copy(model_time)

    time = modules.date_process(model_date_val, model_time_val, start_year)

    if (species.lower() !=
            'gmao_temp') and (species.lower() != 'gmao_psfc') and (
                species.lower() != 'wind_speed') and (species.lower() !=
                                                      'wind_direction'):
        waveform = waveform * 1e9

    #check model vals are valid
    #valid = vals >= 0
    #vals = vals[valid]
    #model_time_val = model_time[valid]
    #model_date_val = model_date[valid]

    #take 8 hour average
    divisor = 8

    total_len = len(waveform) / divisor
    start = 0
    end = divisor
    ave_waveform = []
    ave_time = []
    for i in range(total_len):
        ave = np.ma.average(waveform[start:end])
        ave_time = np.append(ave_time, time[start])
        ave_waveform = np.append(ave_waveform, ave)
        start += divisor
        end += divisor

    time = np.copy(ave_time)
    waveform = np.copy(ave_waveform)

    #take lsp unwindowed of waveform

    ua_periods, ua_mag, ua_ph, ua_fr, ua_fi = modules.take_lomb_unwindowed(
        time, waveform, ofac, 1. / 24)

    #take out known periodic components 1,182.625, and 365.25 a priori for more accurate red noise fit.
    closest_daily_index = min(range(len(ua_periods)),
                              key=lambda i: abs(ua_periods[i] - 1.))
    closest_ha_index = min(range(len(ua_periods)),
                           key=lambda i: abs(ua_periods[i] - 182.625))
    closest_annual_index = min(range(len(ua_periods)),
                               key=lambda i: abs(ua_periods[i] - 365.25))

    rm_indices = [closest_daily_index, closest_ha_index, closest_annual_index]

    ua_mag_c, ua_fr, ua_fi = redfit.sidelobe_percent_remove(
        np.copy(ua_mag), ua_fr, ua_fi, rm_indices, 5., ua_periods)

    #-------------------------------------------------------------------------------
    #Do IFFT of altered spectra - with significant periods removed and gaps left in real and imag components linearly interpolated.
    #altered spectra provides red noise estimation baseline

    ##use ifft to get time series back from adjusted spectra
    #complex Fourier spectrum which corresponds to the Lomb-Scargle periodogram:
    F = [0] * ((len(ua_fr) * 2) + 1)

    #set first real value to average
    F[0] = complex(waveform_ave * len(waveform), 0)

    #Get reverse real and imaginary values
    rev_ua_fr = np.copy(ua_fr[::-1])
    rev_ua_fi = np.copy(ua_fi[::-1])

    rev_ua_fr[0] = 0
    rev_ua_fi[0] = 0

    f_index = 1

    #Fill Fourier Spectrum real and imaginary values
    for i in range(len(ua_fr)):
        F[f_index] = complex(ua_fr[i], ua_fi[i])
        f_index += 1

    for i in range(len(ua_fr)):
        F[f_index] = complex(rev_ua_fr[i], -rev_ua_fi[i])
        f_index += 1

    F = np.array(F)

    #Take ifft and just take real values
    ifft_ua_ts = numpy.fft.ifft(F)
    ifft_ua_ts = ifft_ua_ts.astype('float64')

    ifft_ua_ts_len = (len(ifft_ua_ts) / ofac) + np.mod(len(ifft_ua_ts), ofac)

    ifft_time = time[-ifft_ua_ts_len:]
    ifft_ua_ts = ifft_ua_ts[-len(waveform):]

    ifft_time = ifft_time - ifft_time[0]

    a_periods, a_mag, corr_a_mag, a_fr, a_fi, a_red_periods, a_red_mag, a_gredth, a_fac95, a_fac99, a_fac99_9, a_faccrit, a_fac_grid, a_sig_levels, a_tau, a_corr = redfit.red_background(
        nsim, mctest, ifft_time, ifft_ua_ts, ofac)

    #apply lsp correction from altered spectrum to unaltered spectrum
    corr_ua_mag = ua_mag / a_corr

    #check confidence of each point on spectrum

    sigs = np.zeros(len(corr_ua_mag))

    last_ind = len(a_sig_levels) - 1

    for i in range(len(a_sig_levels) - 1):
        conf_low = a_gredth * a_fac_grid[i]
        conf_up = a_gredth * a_fac_grid[i + 1]

        current_last_ind = i + 1

        for j in range(len(corr_ua_mag)):
            if sigs[j] == 0:
                if (corr_ua_mag[j] >= conf_low[j]) and (corr_ua_mag[j] <
                                                        conf_up[j]):
                    sigs[j] = a_sig_levels[i]
                elif current_last_ind == last_ind:
                    if corr_ua_mag[j] > conf_up[j]:
                        sigs[j] = a_sig_levels[i + 1]

    #get critical significance for all points on spectrum
    crit_sig = a_gredth * a_faccrit

    #get 95,99 and 99.9 % chi squared significance bands for all points on spectrum
    sig_95 = a_gredth * a_fac95
    sig_99 = a_gredth * a_fac99
    sig_99_9 = a_gredth * a_fac99_9

    return (x, sigs, sig_95, sig_99, sig_99_9, crit_sig, a_gredth, corr_ua_mag,
            ua_periods, a_tau)
Пример #21
0
def onpick(event):
    global pl

    global ind
    global fig2

    ind = event.ind
    print 'ind = ', ind
    ind = ind[0]
    #x_data = event.xdata
    #y_data = event.ydata

    #find ind of closest lat/lon
    #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data)

    try:
        for i in range(len(pl)):
            pl.pop(0).remove()
            first_run = False

    except:
        first_run = True
        pass

    pl = m.plot([linear_lons[ind]], [linear_lats[ind]],
                's',
                ms=20,
                alpha=0.6,
                color='yellow',
                zorder=20)

    #get model timeseries for site clicked
    lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, linear_lats[ind],
                                             linear_lons[ind])
    model_var_pick = model_var[:, lat_n, lon_n]
    model_var_pick = model_var_pick * 1e9
    model_var_mask = np.ma.masked_where(model_var_pick <= 0, model_var_pick)
    model_time_pd = pd.date_range(start=model_datetimes[0],
                                  end=model_datetimes[-1],
                                  freq='H')
    model_var_pd = pd.Series(model_var_mask, index=model_time_pd)

    #create sine wave from amp/phase
    model_date_l = model_date.astype(int)
    model_time_l = model_time.astype(int)
    model_times = modules.date_process(model_date_l, model_time_l, start_year)
    model_times = np.array(model_times)
    pi2 = np.pi * 2

    ratio = 100. / annual_amp[lat_n, lon_n]
    ha_percent = ratio * annual_amp[lat_n, lon_n]

    #convert phases to radians
    calc = pi2 / 24.

    calc = pi2 / 6.
    ha_ph_r = ha_ph[lat_n, lon_n] * calc
    calc = pi2 / 12.
    annual_ph_r = annual_ph[lat_n, lon_n] * calc

    ha_model_wave = ha_amp[lat_n, lon_n] * (np.cos((pi2 * model_times /
                                                    (365.25 / 2.)) -
                                                   (ha_ph_r)))
    annual_model_wave = annual_amp[lat_n, lon_n] * (np.cos((pi2 * model_times /
                                                            (365.25)) -
                                                           (annual_ph_r)))

    ha_primary = p_ha_ph[lat_n, lon_n]
    ha_secondary = s_ha_ph[lat_n, lon_n]

    ha_model_wave = ha_model_wave + ave[lat_n, lon_n]
    annual_model_wave = annual_model_wave + ave[lat_n, lon_n]

    model_ha_wave_pd = pd.Series(ha_model_wave, index=model_time_pd)
    model_annual_wave_pd = pd.Series(annual_model_wave, index=model_time_pd)

    fig.canvas.draw()

    if first_run == False:
        plt.close(fig2)
        fig2, (axo) = plt.subplots(1, figsize=(24, 12))
        fig2.patch.set_facecolor('white')

        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_var_pd,
                      color='black',
                      markersize=3,
                      label='Observations')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_ha_wave_pd,
                      color='green',
                      markersize=3,
                      label='Ha Waveform',
                      markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_annual_wave_pd,
                      color='red',
                      markersize=3,
                      label='Annual Waveform',
                      markeredgecolor='None')

        #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent))

        plt.legend(loc='lower right')
        plt.tight_layout()
        axo.grid()

        plt.show()
    else:
        fig2, (axo) = plt.subplots(1, figsize=(24, 12))
        fig2.patch.set_facecolor('white')

        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_var_pd,
                      color='black',
                      markersize=3,
                      label='Observations')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_ha_wave_pd,
                      color='green',
                      markersize=3,
                      label='Ha Waveform',
                      markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(),
                      model_annual_wave_pd,
                      color='red',
                      markersize=3,
                      label='Annual Waveform',
                      markeredgecolor='None')

        #axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s\nPrimary HA Phase = %s,Primary HA Regime = %s, HA Amp to Annual Amp Percent = %s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,ha_primary,ha_regime,ha_percent))

        plt.legend(loc='lower right')
        plt.tight_layout()
        axo.grid()

        plt.show()
site_ref = raw_input('Choose site from list. Sites with full set of yearly files between %i & %i are:\n%s\n'%(first_year,last_year+1,'   '.join(i for i in valid_refs)))

#read in specific site data
site_group = root_grp.groups[site_ref]

#read in variables for site
obs_var = site_group.variables['o3'][:]
full_obs_var = obs_var[:]
full_obs_var_mask = np.ma.masked_where(full_obs_var<=0,full_obs_var)
obs_date = site_group.variables['date'][:]
obs_time = site_group.variables['time'][:]
obs_lat = site_group.latitude
obs_lon = site_group.longitude
obs_alt = site_group.altitude

obs_times = modules.date_process(obs_date,obs_time)
obs_times = np.array(obs_times)
obs_times_full = obs_times[:]

##cut out invalid obs data
obs_var_mask = np.ma.masked_where(obs_var<=0,obs_var)
valids = obs_var > 0
obs_var = obs_var[valids]
obs_times = obs_times[valids]

obs_ave = np.average(obs_var)

year_val = []
month_val = []
day_val = []
hour_val = []
def site_iter_process(valid_refs,c):
#for ref_i in range(len(valid_refs)):
    data_valid = True

    site_ref = valid_refs[c]
    print 'Current Ref is = ', site_ref

    s_files = glob.glob('/work/home/db876/observations/surface/%s/CAPMON/ozon_smpls_%s*'%(species,site_ref))
    site_files = []
    for y in year_array:
        for f in s_files:
            if str(y) in f:
                site_files.append(f)
                           

    site_files = modules.natsorted(site_files)

    yymmdd = []
    hhmm = []
    vals = []

    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999

    for file_i in range(len(site_files)):

        count = 0
        meta_start = -99999
        start_read_1 = False
        start_read_2 = False

        with open(site_files[file_i], 'rb') as f:
            reader = csv.reader(f,delimiter=',')
            print site_files[file_i]
            for row in reader:
                #print count
               #break out of loop at bottom of file
                if (start_read_2 == True) & (row[0] == '*TABLE ENDS'):
                    break
               
               #get metadata
                try:
                    if (row[0] =='*TABLE NAME') & (row[1] == 'Site information'):
                        meta_start = count+2
                except:
                    pass
                if count == meta_start:
                    lat_i = row.index('Latitude: decimal degrees')
                    lon_i = row.index('Longitude: decimal degrees')
                    try:
                        alt_i = row.index('Ground elevation: above mean sea level')
                    except:
                        alt_i = row.index('Ground altitude')
                    class_i = row.index('Site land use')
                
                if count == (meta_start+6):
                    latitude = row[lat_i]
                    longitude = row[lon_i]
                    altitude = row[alt_i]
                    raw_class_name = row[class_i]
                      
                #get data
                if start_read_2 == True:
                    #read dates, times, and vals
                    date = row[8]
                    time = row[9]
                    yymmdd.append(date[:4]+date[5:7] + date[8:])
                    hhmm.append(time[:2]+time[3:])
                    quality_code = row[13]
                    if quality_code == 'V0':
                        vals = np.append(vals,np.float64(row[12]))
                    else:
                        vals = np.append(vals,-99999)
                    
                try:
                    if (row[0] == '*TABLE NAME') & (row[1] == 'OZONE_HOURLY'):
                        start_read_1 = True
                except:
                    pass
                   
                if (start_read_1 == True) & (row[0] == '*TABLE COLUMN UNITS'):
                    unit = row[12]
                
                if (start_read_1 == True) & (row[0] == '*TABLE BEGINS'):
                    start_read_2 = True
                count+=1

    #convert all invalids to -99999
    test_inv = vals < 0
    vals[test_inv] = -99999

    #put o3 vals into full grid
    date_con = np.array(yymmdd).astype(int)
    time_con = np.array(hhmm).astype(int)
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    vals = np.array(vals)
    #make sure no data is past end year
    index_test = indices < len(full_data)
    indices = indices[index_test]
    vals = vals[index_test]
    full_data[indices] = vals
    
    
    #get metadata
    lat = np.float64(latitude)
    lon = np.float64(longitude)
    alt = np.float64(altitude)
        
    #do data quality checks
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,np.float64(altitude),grid_dates,start_year,end_year)
    
    #set measurement method
    mm = 'ultraviolet photometry'
    
    #set site file resolution
    file_res = 'H'
    
    #set sampling as average
    st = 'average'
    
    anthrome_class_name = 'na'
    
    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
Пример #24
0
average_years = raw_input('\nTake average cycles of all years? y or n.\n')

#read in specific site data
site_group = root_grp.groups[site_ref]

#read in variables for site
obs_var = site_group.variables['o3'][:]
full_obs_var = obs_var[:]
full_obs_var_mask = np.ma.masked_where(full_obs_var<=0,full_obs_var)
obs_date = site_group.variables['date'][:]
obs_time = site_group.variables['time'][:]
obs_lat = site_group.latitude
obs_lon = site_group.longitude
obs_alt = site_group.altitude

obs_times = modules.date_process(obs_date,obs_time,first_year)
obs_times = np.array(obs_times)
obs_times_full = obs_times[:]

##cut out invalid obs data
obs_var_mask = np.ma.masked_where(obs_var<=0,obs_var)
valids = obs_var > 0
obs_var = obs_var[valids]
obs_times = obs_times[valids]

obs_ave = np.average(obs_var)

year_val = []
month_val = []
day_val = []
hour_val = []
Пример #25
0
def site_iter_process(valid_refs,c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

#read files site at a time
#for ref_i in range(len(valid_refs)):
    site_ref = valid_refs[c]

    all_latitudes = []
    all_longitudes = []
    all_altitudes = []
    all_unit = []
    all_site_name = []
    all_country = []
    all_contact = []
    mm_big = []
    meta_valid_list = []

    data_valid = True

    print 'Current Ref is = ', site_ref,c
    #find if sites have full valid range from start year and finishing in end year
    s_files = glob.glob('/work/home/db876/observations/surface/%s/EMEP/%s*'%(fname_species,site_ref))
    year_files = [file.replace("/work/home/db876/observations/surface/%s/EMEP/"%(fname_species), "") for file in s_files]
    cut_year_files = [file[8:12] for file in year_files]
    site_files = []
    for y in year_array:
        for i in range(len(s_files)):
            if str(y) in cut_year_files[i]:
                site_files.append(s_files[i])
                  
    site_files = modules.natsorted(site_files)
    
    #test for duplicate file years, if duplicates break processing
    file_years = []
    for file in site_files:
        last_file_split = file.split('/')[-1]
        file_years=np.append(file_years,last_file_split[8:12])
    for y in year_array:
        test = file_years == str(y)
        if len(file_years[test]) > 1:
            print 'Site has duplicate files for %s. Breaking processing'%(y)
            1+'a'

    if site_files == []:
        print 'No valid files for site\n'
        return
    
    #remove daily/monthly files if necessary
    if output_res == 'H':
        del_i = []
        for i in range(len(site_files)):
            if '.1d.' in site_files[i]:
                del_i.append(i)
            elif '.1mo.' in site_files[i]:
                del_i.append(i)
        site_files=np.delete(site_files,del_i)
    elif output_res == 'HD':
        del_i = []
        for i in range(len(site_files)):
            if '.1mo.' in site_files[i]:
                del_i.append(i)
        site_files=np.delete(site_files,del_i)
    
    for y in year_array:
        bad_meta = False
        got_year = False
        for file in site_files:
            last_file_split = file.split('/')[-1]
            if str(y) in last_file_split[8:12]:
                got_year = True
                break
        if got_year == False:
            #fill in data for missing year
            timedelta_diff = datetime.date(y+1, 1, 1) - datetime.date(y, 1, 1)
            ndays_missing = timedelta_diff.days       
            continue
    
        count = 0
        with open(file, 'rb') as f:
            reader = csv.reader(f,delimiter=' ')
            print file
            for row in reader:
                try:
                    row = filter(lambda a: a != '', row)
                except:
                    pass
                try:
                    row = filter(lambda a: a != ',', row)
                except:
                    pass
                                
                #get start date of file
                if row[0] == 'Startdate:':
                    data = row[1]
                    s_yyyy = data[:4]
                    s_mm = data[4:6]
                    s_dd = data[6:8]
                    s_hh = data[8:10]
                    s_min = data[10:12]
                    start_datetime = datetime.datetime(int(s_yyyy),1,1,0,0)
                
                #get unit
                if row[0] == 'Unit:':
                    try:
                        if len(row) == 3:
                            unit_part1 = row[1]
                            unit_part2 = row[2]
                            unit = unit_part1+'_'+unit_part2
                        
                        elif len(row) == 2:
                            unit = row[1] 
                        all_unit.append(unit)
                    except:
                        bad_meta = True
        
                #get resolution
                if row[0] == 'Resolution':
                    if row[1] == 'code:':
                        file_res = row[2]
                        print 'Resolution = %s'%file_res
                
                #get latitude
                if row[0] == 'Station':
                    if row[1] == 'latitude:':
                        latitude = row[2]
                        all_latitudes.append(latitude)
            
                #get longitude
                if row[0] == 'Station':
                    if row[1] == 'longitude:':
                        longitude = row[2]
                        all_longitudes.append(longitude)
                    
                #get altitude
                if row[0] == 'Station':
                    if row[1] == 'altitude:':
                        altitude = row[2][:-1]
                        all_altitudes.append(altitude)
                        
                #get site name
                if row[0] == 'Station':
                    if row[1] == 'name:':
                        site_name = row[2]
                        all_site_name.append(site_name)
            
                #get period
                if row[0] == 'Period':
                    period_code = row[2]
                
                #get stats method
                if row[0] == 'Statistics:':
                    try:
                        st = row[1] + row[2]
                        if st != 'arithmeticmean':
                            print 'Not Arithmetic Mean!'
                            print row[1]
                            print 1+'a'  
                    except:
                        print 'Not Arithmetic Mean!'
                        print row[1]
                        print 1+'a'
            
                #get instrument method and name
                if row[0] == 'Instrument':
                    if row[1] == 'type:':
                        mm_list = row[2:]
                        if len(mm_list) > 1:
                            site_mm = ''
                            for x in range(len(mm_list)):
                                site_mm = site_mm+mm_list[x]+' '
                            site_mm = site_mm.strip()
                        else:
                            site_mm = mm_list[0]
                
                    if row[1] == 'name:':
                        mn_list = row[2:]
                        if len(mn_list) > 1:
                            site_mn = ''
                            for x in range(len(mn_list)):
                                site_mn = site_mn+mn_list[x]+' '
                            site_mn = site_mn.strip()
                        else:
                            site_mn = mn_list[0]
                
                #get method ref
                if row[0] == 'Method':
                    if row[1] == 'ref:':
                        try:
                            mf_list = row[2:]
                            if len(mf_list) > 1:
                                site_mf = ''
                                for x in range(len(mf_list)):
                                    site_mf = site_mf+mf_list[x]+' '
                                site_mf = site_mf.strip()
                            else:
                                site_mf = mf_list[0]
                        except:
                            site_mf = ''
                
                    #put together intrument type+instrument_name+method_ref
                    mm = site_mm+site_mn+site_mf
                
                #get contact
                if row[0] == 'Originator:':
                    try:
                        contact_list = row[1:]
                        if len(contact_list) > 1:
                            site_contact = ''
                            for x in range(len(mf_list)):
                                site_contact = site_contact+contact_list[x]+' '
                            site_contact = site_contact.strip()
                        else:
                            site_contact = site_contact[0]
                    except:
                        site_contact = ''
                    all_contact.append(site_contact)
                
                #get country
                site_country = EMEP_COUNTRIES(file.split('/')[-1][:2])
                all_country.append(site_country)
                
                if row[0] == 'starttime':
                    skip_n = count+1
                    if species == 'ISOP':
                        spec_ind = row.index('C5H8')
                        try:
                            flag_ind = row.index('flag_C5H8')
                        except:
                            flag_ind = row.index('flag')
                    else:
                        spec_ind = row.index(species)
                        try:
                            flag_ind = row.index('flag_'+species)
                        except:
                            flag_ind = row.index('flag')
                    
                count+=1
            
        read = np.loadtxt(file,dtype="f8,f8,f8,f8",skiprows=skip_n,usecols=(0,1,spec_ind,flag_ind),unpack=True)
        read = np.array(read)
        times_since_start = read[0,:]
        endtimes_since_start = read[1,:]
        conc = read[2,:]
        conc = np.array(conc).astype('float64')
        flags = read[3,:]

        dates = []
        times = []
        enddates = []
        endtimes = []
        times_since_start = np.float64(times_since_start)   
        endtimes_since_start = np.float64(endtimes_since_start)  
        for x in range(len(times_since_start)):
            days_since_start = math.trunc(times_since_start[x])
            enddays_since_start = math.trunc(endtimes_since_start[x])
            remainder = times_since_start[x] - days_since_start
            remainder_end = endtimes_since_start[x] - enddays_since_start
            unrounded_hour = remainder*24
            unrounded_hour_end = remainder_end*24
            hour = np.round(unrounded_hour)
            hour_end = np.round(unrounded_hour_end)
            time_delta = datetime.timedelta(days = days_since_start,hours = hour)
            time_delta_end = datetime.timedelta(days = enddays_since_start,hours = hour_end)
            calc_datetime = start_datetime + time_delta
            calc_datetime_end = start_datetime + time_delta_end
            calc_yyyymmdd = calc_datetime.strftime("%Y%m%d") 
            calc_hhmm = calc_datetime.strftime("%H%M")  
            end_calc_yyyymmdd = calc_datetime_end.strftime("%Y%m%d") 
            end_calc_hhmm = calc_datetime_end.strftime("%H%M")
            dates.append(calc_yyyymmdd)
            times.append(calc_hhmm)
            enddates.append(end_calc_yyyymmdd)
            endtimes.append(end_calc_hhmm)
            
        conc = np.float64(conc)
        flags = np.float64(flags)
        
        #add to n_obs_all
        n_all += len(conc)
        
        #IF bad_meta == True then set all file vals as nans
        if bad_meta == True:
            conc[:] = np.NaN
        meta_valid_list.append(bad_meta)
        
        #DO INLINE INVALID AND FLAG CONVERT to NaN
        test = conc < 0
        conc[test] = np.NaN
        
        test = flags != 0
        conc[test] = np.NaN
            
        #convert units by line (only if value is >= than 0
        try:
            if (unit.lower() != 'ppb') & (unit.lower() != 'ppbv'):
                if unit == 'ug/m3':
                    #calculate conversion factor from mg/m3 assuming 293K and 1013 hPa - in EU LAW
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(293.)/(1013./10.)
                    conc = conv_fact*conc
                elif unit == 'ug_N/m3':
                    conv_fact = 8.3144/14.00674*(293.)/(1013./10.)
                    conc = conv_fact*conc
                elif (unit == 'ppm') or (unit == 'ppmv'):
                    conc = conc*1e3
                    #print 'Converting Units from ppmv to ppbv'
                elif (unit == 'ppt') or (unit == 'pptv'):
                    conc = conc/1e3
                    #print 'Converting Units from pptv to ppbv'
                else:
                    print 'Unknown Unit'
                    1+'a'
        except:
            pass
        
        #remove 9.999 from ISOP dataset
        if species == 'ISOP':
            test = conc == 9.999
            conc[test] = np.NaN
        
        #if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
        count=0
        if file_res == '1h':
            n_dups = np.zeros(len(conc))
        elif file_res == '1d':
            n_dups = []
            #if measurement method is flask, then put leave flask measurement in as hourly measurement, the first hour of month 
            file_hours = len(dates)
            for i in range(file_hours):
                current_year = int(dates[count][:4])
                current_month = int(dates[count][4:6])
                current_day = int(dates[count][6:])
                current_hh = int(times[count][:2])
                current_mm = int(times[count][2:])
        
                next_year = int(enddates[i][:4])
                next_month = int(enddates[i][4:6])
                next_day = int(enddates[i][6:])
                next_hh = int(endtimes[i][:2])
                next_mm =  int(endtimes[i][2:])
                
                s = datetime.datetime(year = current_year, month = current_month, day = current_day, hour = current_hh, minute = current_mm)
                e = datetime.datetime(year = next_year, month = next_month, day = next_day, hour = next_hh, minute = next_mm)
                day_dates = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][1:-1]
                day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1]

                dates = np.insert(dates,count+1,day_dates)
                times = np.insert(times,count+1,day_hours)
                conc = np.insert(conc,count+1,[conc[count]]*len(day_dates))

                #append to n duplicated array
                n_dups=np.append(n_dups,0)
                n_dups=np.append(n_dups,[1]*len(day_dates))

                count +=(len(day_dates)+1)
        
        elif file_res == '1mo':
            n_dups = []
            #if measurement method is flask, then put leave flask measurement in as hourly measurement, the first hour of month 
            file_hours = len(dates)
            for i in range(file_hours):
                current_year = int(dates[count][:4])
                current_month = int(dates[count][4:6])
                current_day = int(dates[count][6:])
                current_hh = int(times[count][:2])
                current_mm = int(times[count][2:])
    
                next_year = int(enddates[i][:4])
                next_month = int(enddates[i][4:6])
                next_day = int(enddates[i][6:])
                next_hh = int(endtimes[i][:2])
                next_mm =  int(endtimes[i][2:])
    
                s = datetime.datetime(year = current_year, month = current_month, day = current_day, hour = current_hh, minute = current_mm)
                e = datetime.datetime(year = next_year, month = next_month, day = next_day, hour = next_hh, minute = next_mm)
        
                day_dates = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][1:-1]
                day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1]
                dates = np.insert(dates,count+1,day_dates)
                times = np.insert(times,count+1,day_hours)
                conc = np.insert(conc,count+1,[conc[count]]*len(day_dates))
                
                #append to n duplicated array
                n_dups=np.append(n_dups,0)
                n_dups=np.append(n_dups,[1]*len(day_dates))
                
                count += (len(day_dates)+1)
        
        data = [dates,times,conc,n_dups]
        
        #put measurnement methods and into big list len of times
        mm_big=np.append(mm_big,[mm]*len(dates))
      
        try:
            big_list = np.hstack((big_list,data))
        except:
            big_list = np.array(data)
                
    if (y == year_array[-1]):    

        #get dates and times
        date_con = big_list[0,:]
        time_con = big_list[1,:]
          
        #get vals
        vals = np.array(big_list[2,:]).astype('float64')
        
        #get n dup array
        n_dup_array = np.array(big_list[3,:]).astype(float).astype(int)

        #if all files have missing key meta then exit
        if all(i == True for i in meta_valid_list) == True:
            inv_nometa += 1
            print 'Site Invalid. No Metadata for ref'
            if no2_type == 'MOLYBDENUM':
                n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_obs_after_anyvaliddata,inv_nokeymeta,n_obs_after_nokeymeta,inv_resolution,n_obs_after_resolution,inv_badmeasurementmethod,n_obs_after_badmeasurementmethod = 0,0,0,0,0,0,0,0,0,0,0,0,0
            exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod])
            n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod])
            unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list]
            meta = ['na','na','na','na','na','na','na','na','na','na','na','na']
            exit_r = 'nometa'
            return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1)
        valid_hours_dup = np.sum(n_dup_array)
        n_after_nometa += (len(vals)-valid_hours_dup)

        #delete big list
        del big_list

        date_con = np.array(date_con).astype(int)
        time_con = np.array(time_con).astype(int)
        
        #remove data < 1970 and >= 2015
        test_inds = (date_con >= 19700101) & (date_con < 20150101)
        date_con = date_con[test_inds]
        time_con = time_con[test_inds]
        vals = vals[test_inds]
        mm_big = mm_big[test_inds]
        n_dup_array = n_dup_array[test_inds]
        
        #set st_big as 'continuous'
        st_big = ['continuous']*len(vals)
        
        #convert all Nans back to -99999
        test = np.isnan(vals)
        vals[test] = -99999
        
        #get obs valid
        test = vals >= 0
        valid_hours_dup = np.sum(n_dup_array[test])
        n_obs_valid = int(len(vals[test]) - valid_hours_dup)
        n_after_flagsandlod += n_obs_valid
        
        #create max possible species grid, measurement method and sampling type grids
        full_data = np.empty(n_hours)
        full_data_after_flagsandlod = np.empty(n_hours)
        big_n_dup_array = np.zeros(n_hours)
        full_data[:] = -99999
        full_data_after_flagsandlod[:] = -99999
        
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        converted_time = modules.date_process(date_con,time_con,start_year)
        converted_time = np.round(converted_time,decimals=5)
        syn_grid_time = np.arange(0,n_days,1./24)
        syn_grid_time = np.round(syn_grid_time,decimals=5)
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
        vals = np.array(vals)
        full_data_after_flagsandlod[raw_indices] = vals
        raw_st = np.copy(st_big)
        raw_mm = np.copy(mm_big)
        
        # test and remove duplicate and overlap points
        converted_time,vals,mm_big,st_big,n_dup_array = modules.remove_duplicate_points(site_ref,converted_time,vals,mm_big,st_big,n_dup_array,output_res)
        test = vals >= 0
        valid_hours_dup = np.sum(n_dup_array[test])
        n_obs_valid = int(len(vals[test]) - valid_hours_dup)
        n_after_duplicate += n_obs_valid
        
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        indices = np.searchsorted(syn_grid_time, converted_time, side='left')
        full_data[indices] = vals 
        big_n_dup_array[indices] = n_dup_array
    
        #get mode of metadata
        try:
            lat = np.float32(stats.mode(all_latitudes)[0][0]) 
        except:
            lat = 'na'
        try:
            lon = np.float32(stats.mode(all_longitudes)[0][0])  
        except:
            lon = 'na'
        try:
            alt = np.float32(stats.mode(all_altitudes)[0][0]) 
        except:
            alt = 'na'
        unit = stats.mode(all_unit)[0][0]
        #remove empty strings from extra meta before mode test
        try:
            site_name = stats.mode(filter(None, all_site_name))[0][0]
        except:
            site_name = 'na'
        try:
            country = stats.mode(filter(None, all_country))[0][0]
        except:
            country = 'na'
        try:
            contact = stats.mode(filter(None, all_contact))[0][0] 
        except:
            contact = 'na'
    
        #set data tz - all EMEP times are UTC
        data_tz = 0
        all_tz = [data_tz]
    
        key_meta = [lat,lon,alt]
        
        #convert file res to standard format
        if file_res == '1h':
            file_res = 'H'
        elif file_res == '1d':
            file_res = 'D'
        elif file_res == '1mo':
            file_res = 'M'
    
        #get sampling/instrument grids
        raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,unknown_mm_list,unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(site_ref,process_group,species,raw_st,raw_mm,full_data_after_flagsandlod,full_data,raw_indices,unknown_mm_list,unknown_mm_refs_list,no2_type)

        #do quality checks                                                                                                                                                                                                                                                                                                     
        data_valid,full_data,valid_hours_dup,p_st_grid,p_mm_grid,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod,exit_r = modules.primary_quality_control(site_ref,species,file_res,no2_type,grid_dates,full_data,big_n_dup_array,valid_hours_dup,raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,data_resolution,n_obs_valid,key_meta,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod)
        if data_valid == False:
            exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod])
            n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod])
            unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list]
            meta = [lat,lon,alt,'na','na','na','na','na','na','na','na','na']
            return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1)

        #set metadata not available as na
        raw_class_name = 'na'
    
        #set processed unit
        p_unit = 'ppbv'
    
        #get local timezone
        try:
            local_tz_name = tz_root.tzNameAt(lat,lon,forceTZ=True)
            pytz_obj = pytz.timezone(local_tz_name)
            datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000,1,1))
            if datetime_offset < datetime.timedelta(0):
                local_tz = -(24-int(datetime_offset.seconds/60/60))
            else:
                local_tz = int(datetime_offset.seconds/60/60)
        except:
            local_tz = 'na'
            print 'TIMEZONE NOT KNOWN, SITE IS %s'%(site_ref)
            unknown_local_tz_list.append(site_ref)

        #pack meta
        meta = [lat,lon,alt,raw_class_name,file_res,unit,p_unit,data_tz,local_tz,site_name,country,contact]
    
        #if blank strings in meta then convert to 'na'
        for i in range(len(meta)):
            try:
                if meta[i].strip() == '':
                    meta[i] = 'na'
            except:
                pass
    
        print set(raw_st_grid)
        print set(raw_mm_grid)
        print set(p_st_grid)
        print set(p_mm_grid)
        print meta
    
        exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod])
        n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod])
        print 'exit counts = ', exit_c_list
        print 'n obs counts = ', n_c_list

        unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list]

        return c,full_data,p_st_grid,p_mm_grid,data_valid,meta,exit_c_list,n_c_list,unknown_list,'na',big_n_dup_array
Пример #26
0
def onpick(event):
    global pl
    
    global ind
    global fig2
    
    ind = event.ind
    ind = ind[0]
    #x_data = event.xdata
    #y_data = event.ydata

    #find ind of closest lat/lon
    #ind = modules.find_nearest_point_index(obs_lons,obs_lats,x_data,y_data)
    
    try:
        for i in range(len(pl)):
            pl.pop(0).remove()
            first_run = False  
        
    except:
        first_run = True
        pass
    
    pl = m.plot([X[ind]], [Y[ind]], 'o', ms=12, alpha=0.6, color='yellow',zorder=20)
    
    #get model timeseries for site clicked
    lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,obs_lats[ind],obs_lons[ind])
    model_var_pick = model_var[:,lat_n,lon_n]
    model_var_pick = model_var_pick*1e9
    model_var_mask = np.ma.masked_where(model_var_pick<=0,model_var_pick)
   
    if model_name == 'MACC':
        model_time_pd = pd.date_range(start = model_datetimes[0],end = model_datetimes[-1], freq = 'H')
        count = 0
        valids = []
        for i in range(len(model_time_pd)):
            if count == 0:
                valids.append(i)
                count+=1
            elif count == 2:
                count = 0
            else:
                count+=1
        model_time_pd = model_time_pd[valids]
        model_var_pd = pd.Series(model_var_mask, index=model_time_pd) 
    else:    
        model_time_pd = pd.date_range(start = model_datetimes[0],end = model_datetimes[-1], freq = 'H')
        model_var_pd = pd.Series(model_var_mask, index=model_time_pd)
    
    #get obs timeseries for site clicked
    ref = obs_refs[ind]
    obs_ts_group = obs_root_grp.groups[ref]
    obs_var = obs_ts_group.variables[species.lower()][:]
    group = obs_ts_group.process_group
    lat = obs_ts_group.latitude
    lon = obs_ts_group.longitude
    lon = obs_ts_group.longitude
    alt = obs_ts_group.altitude
    complete = obs_ts_group.completeness
    a_class = obs_ts_group.anthrome_class
    r_class = obs_ts_group.raw_class
    continent = loc_dict[tags[ind]]
    country = obs_ts_group.country
    
    obs_var_mask = np.ma.masked_where(obs_var<=0,obs_var)
    obs_time_pd = pd.date_range(start = obs_datetimes[0],end = obs_datetimes[-1], freq = 'H')
    obs_var_pd = pd.Series(obs_var_mask, index=obs_time_pd)
    
    #create sine wave from amp/phase
    obs_date_l = obs_date.astype(int)
    obs_time_l = obs_time.astype(int)
    obs_times = modules.date_process(obs_date_l,obs_time_l,start_year)
    obs_times = np.array(obs_times)
    pi2 = np.pi*2
    
    #convert phases to radians
    calc = pi2/6.
    obs_ha_phase_r = obs_ha_phase[ind] * calc
    calc = pi2/12.
    obs_annual_phase_r = obs_annual_phase[ind] * calc
    
    ha_obs_wave = obs_ha_mag[ind]*(np.cos((pi2*obs_times/(365.25/2.))-(obs_ha_phase_r)))
    annual_obs_wave = obs_annual_mag[ind]*(np.cos((pi2*obs_times/(365.25))-(obs_annual_phase_r)))
    seasonal_obs_wave = (ha_obs_wave+annual_obs_wave)+obs_ave[ind]
    obs_seasonal_wave_pd = pd.Series(seasonal_obs_wave, index=obs_time_pd)
    
    #create sine wave from amp/phase
    mod_date_l = model_date.astype(int)
    mod_time_l = model_time.astype(int)
    mod_times = modules.date_process(mod_date_l,mod_time_l,start_year)
    mod_times = np.array(mod_times)
    pi2 = np.pi*2
    
    #convert phases to radians
    calc = pi2/6.
    model_ha_phase_r = model_ha_phase[ind] * calc
    calc = pi2/12.
    model_annual_phase_r = model_annual_phase[ind] * calc
    
    ha_model_wave = model_ha_mag[ind]*(np.cos((pi2*mod_times/(365.25/2.))-(model_ha_phase_r)))
    annual_model_wave = model_annual_mag[ind]*(np.cos((pi2*mod_times/(365.25))-(model_annual_phase_r)))
    seasonal_model_wave = (ha_model_wave+annual_model_wave)+model_ave[ind]
    model_seasonal_wave_pd = pd.Series(seasonal_model_wave, index=model_time_pd)

    
    #get spectra data
    site_group_obs = root_grp_obs_spec.groups[ref]
    site_group_mod = root_grp_mod_spec.groups[ref]
    
    obs_period = site_group_obs.variables['period'][:]
    mod_period = site_group_mod.variables['period'][:]
    
    obs_amp = site_group_obs.variables['amplitude'][:]
    mod_amp = site_group_mod.variables['amplitude'][:]
    
    fig.canvas.draw()
        
    if first_run == False:
        plt.close(fig2)
        fig2, (axo,axo2) = plt.subplots(2,figsize=(24,12))
        fig2.patch.set_facecolor('white')
        
        #fig2 = plt.figure()
        
        axo.plot_date(obs_time_pd.to_pydatetime(), obs_var_pd, color='black', markersize = 3, label = 'Observations')
        axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='red',alpha=0.5, markersize = 3, label = '%s %s %s %s'%(model_name,version,grid_size,met),markeredgecolor='None')
        axo.plot_date(obs_time_pd.to_pydatetime(), obs_seasonal_wave_pd, color='yellow', markersize = 3, label = 'Obs Seasonal Waveform',markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(), model_seasonal_wave_pd, color='green', markersize = 3, label = 'Model Seasonal Waveform',markeredgecolor='None')
        
        axo2.loglog(obs_period,obs_amp,color='black',label='Obs')
        axo2.loglog(mod_period,mod_amp,color='red',label = '%s %s %s %s'%(model_name,version,grid_size,met))

        axo2.text(0.01, 0.95, 'Obs D Amp = %8.2f ppb'%(obs_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.92, 'Model D Amp = %8.2f ppb'%(model_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.85, 'Obs HA Amp = %8.2f ppb'%(obs_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.82, 'Model HA Amp = %8.2f ppb'%(model_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.75, 'Obs A Amp = %8.2f ppb'%(obs_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.72, 'Model A Amp = %8.2f ppb'%(model_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.55, 'Obs D Phase = %8.2f'%(obs_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.52, 'Model D Phase = %8.2f'%(model_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.45, 'Obs HA Phase = %8.2f'%(obs_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.42, 'Model HA Phase = %8.2f'%(model_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        obs_a_ph = obs_annual_phase[ind]
        mod_a_ph = model_annual_phase[ind]
        
        if obs_a_ph > 12:
            obs_a_ph = obs_a_ph-12.
        if mod_a_ph > 12:
            mod_a_ph = mod_a_ph-12.
        
        axo2.text(0.01, 0.35, 'Obs A Phase = %8.2f'%(obs_a_ph),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.32, 'Model A Phase = %8.2f'%(mod_a_ph),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.15, 'Obs Ave = %8.2f ppb'%(obs_ave[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.12, 'Model Ave = %8.2f ppb'%(model_ave[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.axvline(1.,ymin=0,ymax=1,color='blue',linestyle='--')
        axo2.axvline(182.625,ymin=0,ymax=1,color='blue',linestyle='--')
        axo2.axvline(365.25,ymin=0,ymax=1,color='blue',linestyle='--')
        
        axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter))
        plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter))
        
        
        axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,lat_n,lon_n)) 
        
        plt.legend(loc = 'lower right')
        plt.tight_layout()
        axo.grid()
        axo2.grid()
        
        plt.show()
    else:
        #fig2 = plt.figure()
        fig2, (axo,axo2) = plt.subplots(2,figsize=(24,12))
        fig2.patch.set_facecolor('white')
        
        axo.plot_date(obs_time_pd.to_pydatetime(), obs_var_pd, color='black', markersize = 3, label = 'Observations')
        axo.plot_date(model_time_pd.to_pydatetime(), model_var_pd, color='red', markersize = 3,alpha=0.5, label = '%s %s %s %s'%(model_name,version,grid_size,met),markeredgecolor='None')
        axo.plot_date(obs_time_pd.to_pydatetime(), obs_seasonal_wave_pd, color='yellow', markersize = 3, label = 'Obs Seasonal Waveform',markeredgecolor='None')
        axo.plot_date(model_time_pd.to_pydatetime(), model_seasonal_wave_pd, color='green', markersize = 3, label = 'Model Seasonal Waveform',markeredgecolor='None')
        
        axo2.loglog(obs_period,obs_amp,color='black',label='Obs')
        axo2.loglog(mod_period,mod_amp,color='red', label = '%s %s %s %s'%(model_name,version,grid_size,met))

        axo2.text(0.01, 0.95, 'Obs D Amp = %8.2f ppb'%(obs_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.92, 'Model D Amp = %8.2f ppb'%(model_daily_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.85, 'Obs HA Amp = %8.2f ppb'%(obs_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.82, 'Model HA Amp = %8.2f ppb'%(model_ha_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.75, 'Obs A Amp = %8.2f ppb'%(obs_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.72, 'Model A Amp = %8.2f ppb'%(model_annual_mag[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.55, 'Obs D Phase = %8.2f'%(obs_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.52, 'Model D Phase = %8.2f'%(model_daily_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.45, 'Obs HA Phase = %8.2f'%(obs_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.42, 'Model HA Phase = %8.2f'%(model_ha_phase[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        obs_a_ph = obs_annual_phase[ind]
        mod_a_ph = model_annual_phase[ind]
        
        if obs_a_ph > 12:
            obs_a_ph = obs_a_ph-12.
        if mod_a_ph > 12:
            mod_a_ph = mod_a_ph-12.
        
        axo2.text(0.01, 0.35, 'Obs A Phase = %8.2f'%(obs_a_ph),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.32, 'Model A Phase = %8.2f'%(mod_a_ph),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.text(0.01, 0.15, 'Obs Ave = %8.2f ppb'%(obs_ave[ind]),transform=axo2.transAxes,fontweight='bold')
        axo2.text(0.01, 0.12, 'Model Ave = %8.2f ppb'%(model_ave[ind]),transform=axo2.transAxes,fontweight='bold',color='red')
        
        axo2.axvline(1.,ymin=0,ymax=1,color='blue',linestyle='--')
        axo2.axvline(182.625,ymin=0,ymax=1,color='blue',linestyle='--')
        axo2.axvline(365.25,ymin=0,ymax=1,color='blue',linestyle='--')
        
        axo2.xaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        axo2.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
        plt.gca().xaxis.set_major_formatter(FuncFormatter(xformatter))
        plt.gca().yaxis.set_major_formatter(FuncFormatter(yformatter))
        
        
        axo.set_title('Site = %s, Country = %s, Continent = %s, Process Group = %s, Lat = %s, Lon = %s, Alt = %sm,\n Data Completeness = %s%%, Anthrome Class = %s, Raw Class = %s, Grid Index = %s,%s' %(ref,country,continent,group,lat,lon,alt,complete,a_class,r_class,lat_n,lon_n))
        
        plt.legend(loc = 'lower right')
        plt.tight_layout()
        axo.grid()
        axo2.grid()
        
        plt.show()
Пример #27
0
end_year = 2008

d0 = datetime.date(start_year, 1, 1)
d1 = datetime.date(end_year+1, 1, 1)
delta = d1 - d0
n_days = delta.days

all_hours = np.arange(0,n_days,1./24.)

group = Dataset('GAW_SURFACE_O3_1971_2009.nc')
site_group = group.groups['hpb']
vals = site_group.variables['o3'][:]
date = site_group.variables['date'][:]
time = site_group.variables['time'][:]

current_time = modules.date_process(date,time,start_year) 

print current_time
print vals

valid = vals > 0
vals = vals[valid]
current_time = current_time[valid]


print current_time

all_hours = np.arange(np.min(current_time),np.max(current_time)+1./48.,1./24.)

f = interpolate.interp1d(current_time, vals)
vals =  f(all_hours)
Пример #28
0
def run_LSP(model_data, x):

    print obs_refs[x]

    vals = model_data

    #check obs vals are valid
    valid = vals >= 0
    vals = vals[valid]
    model_time_val = model_time[valid]
    model_date_val = model_date[valid]

    full_times = modules.date_process(model_date, model_time, start_year)
    if timeres == 'M':
        full_times_year = full_times[:12]
    else:
        full_times_year = full_times[:8766]
    full_times_day = full_times[:24]

    valid_times = modules.date_process(model_date_val, model_time_val,
                                       start_year)

    site_lon = obs_lons[x]

    #convert site_lon to 0 to 360 degs
    if site_lon < 0:
        site_lon = 360 - np.abs(site_lon)

    #transform from UTC time to solar time
    sun_time = lon_step_time * site_lon
    time_diff = sun_time - 0
    if time_diff > 12:
        time_diff = time_diff - 24

    #make time start from 0
    valid_times_from0 = modules.phase_start_correct(valid_times)

    periodic_periods = [
        1. / 4., 1. / 3., 1. / 2., 1., 365.25 / 4., 365.25 / 3., 365.25 / 2.,
        365.25
    ]
    periods, mag, ph, fr, fi = modules.take_lomb_spec(
        valid_times_from0, vals, w=True, key_periods=periodic_periods)

    #get mean of values
    mean_array = np.average(vals)

    #correct all phases for start point (not actually being from 0 - just corrected to be)
    ph = modules.phase_start_point_correct_all(periodic_periods, ph,
                                               valid_times)

    key_diurnal_periods = [1. / 4., 1. / 3., 1. / 2., 1.]
    key_seasonal_periods = [365.25 / 4., 365.25 / 3., 365.25 / 2., 365.25]

    diurnal_mags = mag[:4]
    seasonal_mags = mag[4:]
    seasonal_phs = ph[4:]

    #get individual mags and phases
    daily_h3_mag = mag[0]
    daily_h2_mag = mag[1]
    daily_h1_mag = mag[2]
    orig_daily_mag = mag[3]
    daily_h3_ph = ph[0]
    daily_h2_ph = ph[1]
    daily_h1_ph = ph[2]
    orig_daily_ph = ph[3]

    seasonal_h3_mag = mag[4]
    seasonal_h2_mag = mag[5]
    seasonal_h1_mag = mag[6]
    annual_mag = mag[7]
    seasonal_h3_ph = ph[4]
    seasonal_h2_ph = ph[5]
    seasonal_h1_ph = ph[6]
    annual_ph = ph[7]

    #convert sub diurnal phases from UTC to solar time
    daily_h3_ph = modules.solar_time_phase_corrector(daily_h3_ph, 6, time_diff)
    daily_h2_ph = modules.solar_time_phase_corrector(daily_h2_ph, 24. / 3.,
                                                     time_diff)
    daily_h1_ph = modules.solar_time_phase_corrector(daily_h1_ph, 12,
                                                     time_diff)
    orig_daily_ph = modules.solar_time_phase_corrector(orig_daily_ph, 24,
                                                       time_diff)
    diurnal_phs = [daily_h3_ph, daily_h2_ph, daily_h1_ph, orig_daily_ph]

    #convolve annual cycle and harmonics to seasonal waveform for 1 year
    seasonal_mag, seasonal_min_ph, seasonal_max_ph, seasonal_waveform, seasonal_ff = modules.period_convolution(
        key_seasonal_periods, full_times_year, seasonal_mags, seasonal_phs,
        mean_array)

    #convolve diurnal cycle and harmonics to diurnal waveform for 1 day
    diurnal_mag, diurnal_min_ph, diurnal_max_ph, diurnal_waveform, diurnal_ff = modules.period_convolution(
        key_diurnal_periods, full_times_day, diurnal_mags, diurnal_phs,
        mean_array)

    #convolve all
    full_mag, full_min_ph, full_max_ph, full_waveform, full_ff = modules.period_convolution(
        periodic_periods, full_times, mag, ph, mean_array)

    #convert phase to time
    daily_h3_ph = modules.convert_phase_units_actual_single(daily_h3_ph, 6.)
    daily_h2_ph = modules.convert_phase_units_actual_single(
        daily_h2_ph, 24. / 3.)
    daily_h1_ph = modules.convert_phase_units_actual_single(daily_h1_ph, 12.)
    orig_daily_ph = modules.convert_phase_units_actual_single(
        orig_daily_ph, 24.)
    diurnal_min_ph = modules.convert_phase_units_actual_single(
        diurnal_min_ph, 24.)
    diurnal_max_ph = modules.convert_phase_units_actual_single(
        diurnal_max_ph, 24.)
    seasonal_h3_ph = modules.convert_phase_units_actual_single(
        seasonal_h3_ph, 3.)
    seasonal_h2_ph = modules.convert_phase_units_actual_single(
        seasonal_h2_ph, 4.)
    seasonal_h1_ph = modules.convert_phase_units_actual_single(
        seasonal_h1_ph, 6.)
    annual_ph = modules.convert_phase_units_actual_single(annual_ph, 12.)
    seasonal_min_ph = modules.convert_phase_units_actual_single(
        seasonal_min_ph, 12.)
    seasonal_max_ph = modules.convert_phase_units_actual_single(
        seasonal_max_ph, 12.)

    return (x, daily_h3_mag, daily_h3_ph, daily_h2_mag, daily_h2_ph,
            daily_h1_mag, daily_h1_ph, orig_daily_mag, orig_daily_ph,
            diurnal_mag, diurnal_min_ph, diurnal_max_ph, seasonal_h3_mag,
            seasonal_h3_ph, seasonal_h2_mag, seasonal_h2_ph, seasonal_h1_mag,
            seasonal_h1_ph, annual_mag, annual_ph, seasonal_mag,
            seasonal_min_ph, seasonal_max_ph, mean_array, diurnal_waveform,
            seasonal_waveform, full_waveform)
Пример #29
0
#----------------------------------------
#find model data gridbox to compare with obs.

#get model gridbox for obs site
lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,obs_lat,obs_lon)

model_var = model_var[:,lat_n,lon_n]
model_var = model_var*1e9

model_var_mask = np.ma.masked_where(model_var<=0,model_var)
model_ave = np.ma.average(model_var_mask)

#--------------------------------------------
#take half daily average of obs and model

obs_time = modules.date_process(obs_date,obs_time,start_year)
model_time = modules.date_process(model_date,model_time,start_year)

divisor = 6

#take half daily average of obs
total_len = len(obs_var_mask)/divisor
start = 0
end = divisor
ave_obs_var = []
ave_obs_time = []
for i in range(total_len):
    ave = np.ma.average(obs_var_mask[start:end])
    ave_obs_time=np.append(ave_obs_time,obs_time[start])
    ave_obs_var=np.append(ave_obs_var,ave)
    start+=divisor
def site_iter_process(valid_refs,c):
#process data for each site at a time
#for site_ref in valid_refs:
    site_ref = valid_refs[c]
    data_valid = True
    print 'ref = ',site_ref
    site_test = all_refs == site_ref
    
    site_yyyymmdd = yyyymmdd[site_test]
    site_hhmm = hhmm[site_test]
    site_vals = vals[site_test]
    site_mm = all_mm[site_test]
    site_units = units[site_test]
    
    if species == 'ISOP':
        site_sample_len = sample_len[site_test]
     
    #check for data below limit of detection (only for ISOP) as other species have LOD check by line in file. If it is change to -99999
    #LOD for ISOP if 0.01 ppbv
    if species == 'ISOP':
        lod_test =  site_vals < 0.01
    
    #convert from ppm to ppb
    if (species == 'O3') or (species == 'NO') or (species == 'NO2'):
        for i in range(len(site_vals)):
            if site_units[i] == 'Parts per million':
                site_vals[i] = site_vals[i]*1.e3
            elif site_units[i] == 'Parts per billion':
                pass
            else:
                print site_units[i]
                1+'a'
        
    # convert from ppbC to ppb
    if species == 'ISOP':
        for i in range(len(site_vals)):
            #078 is Parts per billion Carbon, Isoprene has 5 Carbons
            if site_units[i] == '078':
                site_vals[i] = site_vals[i]/5.  
            #008 is Parts per billion
            if site_units[i] == '008':
                pass
            #101 is Parts per million Carbon
            if site_units[i] == '101':
                site_vals[i] = (site_vals[i]/5.)*1.e3
                
        site_vals[lod_test] = -99999
 
    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)
    
    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    
    #if date goes past where it should finish, omit it.
    inv_i = indices < len(full_data)
    indices = indices[inv_i]
    site_vals = site_vals[inv_i]
 
    full_data[indices] = site_vals
    
    #get site meta
    meta_index = meta_refs.index(site_ref)
    lat = np.float64(meta_lats[meta_index])
    lon =  np.float64(meta_lons[meta_index])
    alt =  np.float64(meta_alts[meta_index])
    raw_class_name = meta_class[meta_index]
    
    #get measurement method, take mode of big methods array
    site_mm = stats.mode(site_mm)[0][0]
    if (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETABSORPTION') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET2BMODEL202') or (site_mm.upper() == 'INSTRUMENTAL-UVPHOTOMETRIC') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETRADIATIONABSORBTN') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETPHOTOMETRY') or (site_mm.upper() == 'INSTRUMENTAL-UVABSORPTIONPHOTOMETRY/UV2BMODEL202AND205') or (site_mm.upper() == 'INSTRUMENTAL-ECOTECHSERINUS10'):
        mm = 'ultraviolet photometry'
        
    elif (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GASPHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEAPIMODEL265EANDT265') or (site_mm.upper() == 'LOWLEVELNOXINSTRUMENTAL-TECO42SCHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GAS-PHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPIT200UPPHOTOLYTIC') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPI200EU/501') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9841T') or (site_mm.upper() == 'TELEDYNE-APIMODEL200EUPORT200UP-PHOTOLYTIC-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-TL,42I-TL') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCERHODAMINEBDYE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-Y,42I-Y') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9843'):
        mm = 'chemiluminescence'
        
    elif (site_mm.upper() == 'INSTRUMENTAL-OPENPATHO3ANALYZER') or (site_mm.upper() == 'INSTRUMENTAL-OPENPATHNOANALYZER'):
        mm = 'differential optical absorption spectrosocopy'
        
    elif (site_mm.upper() == 'TELEDYNEMODELT500U-CAVITYATTENUATEDPHASESHIFTSPECTROSCOPY'):
        mm = 'cavity attenuated phase shift spectroscopy'

    elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-GRIESS-SALTZMAN') or (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC'):
        mm = 'griess saltzman colorimetric'
        
    elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-LYSHKOW(MOD)'):
        mm = 'lyshkow colorimetric '
        
    elif (site_mm.upper() == 'INSTRUMENTAL-COULOMETRIC'):
        mm = 'coulometry'
        
    else:
        print site_mm.upper()
        1+'a'

    #do data quality checks
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

    #set site file resolution
    file_res = 'H'
    
    #set sampling as average
    st = 'average'
    
    anthrome_class_name = 'na'

    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
Пример #31
0
#find model data gridbox to compare with obs.

#get model gridbox for obs site
lat_n,lon_n = modules.obs_model_gridbox(lat_e,lon_e,obs_lat,obs_lon)

model_var = model_var[:,lat_n,lon_n]
model_var = model_var*1e9

model_var_mask = np.ma.masked_where(model_var<=0,model_var)
model_ave = np.ma.average(model_var_mask)
model_var = model_var[~np.isnan(model_var_mask)]

#--------------------------------------------
#get valid data and process time

obs_time = np.array(modules.date_process(obs_date,obs_time,start_year))
model_time = np.array(modules.date_process(model_date,model_time,start_year))

model_test = model_var >= 0
model_var = model_var[model_test]
model_time = model_time[model_test]

obs_test = obs_var >= 0
obs_var = obs_var[obs_test]
obs_time = obs_time[obs_test]

#--------------------------------------------
#take LSP's

#windowing?
wind_set = raw_input('Windowing? Y or N?\n')
Пример #32
0
def site_iter_process(valid_refs, c):

    # for each valid location process
    # limit obs data due for each site in valid_obs_site_names
    # for c in range(len(valid_refs)):

    all_lat = []
    all_lon = []
    all_alt = []
    all_st = []
    all_mm = []

    site_ref = valid_refs[c]

    file_valid = True
    data_valid = True

    print site_ref
    file_res = data_resolutions[c]
    print file_res

    # read files for each valid site
    s_files = sorted(
        glob.glob("/work/home/db876/observations/surface/%s/GAW/%s**.%s**.dat" % (species, site_ref.lower(), file_res))
    )

    print s_files
    if file_res == "hr":
        site_files = sorted(s_files, key=lambda x: x.split(".hr")[1])

    else:
        site_files = sorted(s_files)

    delete_inds = []
    if file_res == "hr":
        # limit site files before and after year limit

        for i in range(len(site_files)):
            f = site_files[i]
            year = f.split(".hr")[1][:4]
            if int(year) < int(start_year):
                delete_inds.append(i)
            if int(year) > int(end_year):
                delete_inds.append(i)

        site_files = np.delete(site_files, delete_inds)
        print site_files

    site_file_len = len(site_files)
    s_count = 0
    start_ind = 0
    end_ind = 0
    for f in site_files:
        print f
        read = np.loadtxt(f, dtype="S10,S5,f8", comments="C", usecols=(0, 1, 4), unpack=True)
        read = np.array(read)

        dates = read[0, :]
        times = read[1, :]
        conc = read[2, :]
        conc = np.array(conc)
        conc = conc.astype(float)

        # change all vals < 0 to np.NaN
        inv_test = conc < 0
        conc[inv_test] = np.NaN

        start_ind = end_ind
        end_ind += len(conc)

        s_count += 1

        units = []
        mycsv = csv.reader(open(f))
        row_count = 0
        for row in mycsv:
            if row_count == 11:
                val = " ".join(row)
                lat = val.replace(" ", "")
                lat = lat[12:]
                lat = float(lat)
                all_lat.append(lat)
            # get lon
            if row_count == 12:
                val = " ".join(row)
                lon = val.replace(" ", "")
                lon = lon[13:]
                lon = float(lon)
                all_lon.append(lon)
            # get altitude
            if row_count == 13:
                val = " ".join(row)
                alt = val.replace(" ", "")
                alt = alt[12:]
                alt = float(alt)
                all_alt.append(alt)
            # get units
            if row_count == 20:
                val = " ".join(row)
                unit = val.replace(" ", "")
                unit = unit[19:]
            # get measurement method
            if row_count == 21:
                val = " ".join(row)
                mm = val.replace(" ", "")
                mm = mm[21:]
                all_mm.append(mm)
            # get sampling type
            if row_count == 22:
                val = " ".join(row)
                st = val.replace(" ", "")
                st = st[16:]
                all_st.append(st)
            if row_count == 23:
                val = " ".join(row)
                tz = val.replace(" ", "")
                tz = tz[12:]

            row_count += 1

        # test if units are in ppb for each file - if not convert

        if (unit != "ppb") & (unit != "ppbv"):
            if (unit == "ug/m3") or (unit == "ugN/m3"):
                print "converting units, temp = 20degC"
                # calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144 / mol_mass * (273.15 + 20) / (1013.25 / 10)
                conc = conv_fact * conc
            elif (unit == "ug/m3-20C") or (unit == "ugN/m3-20C"):
                print "converting units, temp = 20degC"
                # calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144 / mol_mass * (273.15 + 20) / (1013.25 / 10)
                conc = conv_fact * conc
            elif (unit == "ug/m3-25C") or (unit == "ugN/m3-25C") or (unit == "ug/m3at25C"):
                print "converting units, temp = 25degC"
                # calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144 / mol_mass * (273.15 + 25) / (1013.25 / 10)
                conc = conv_fact * conc
            elif (unit == "mg/m3-20C") or (unit == "mgN/m3-20C"):
                print "converting units, temp = 25degC"
                # calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144 / mol_mass * (273.15 + 20) / (1013.25 / 10)
                conc = (conv_fact * conc) * 1e3
            elif (unit == "mg/m3-25C") or (unit == "mgN/m3-25C"):
                print "converting units, temp = 25degC"
                # calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                # R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144 / mol_mass * (273.15 + 25) / (1013.25 / 10)
                conc = (conv_fact * conc) * 1e3
            elif (unit == "ppm") or (unit == "ppmv"):
                conc = conc * 1.0e3
            elif (unit == "ppt") or (unit == "pptv"):
                conc = conc / 1.0e3

            else:
                print "Unknown Unit"
                print unit
                1 + "a"
                break

        if tz != "UTC":
            if tz == "":
                if site_ref.lower() in ["plm"]:
                    tz = -5

                if site_ref.lower() in ["kos", "edm", "vdl", "nwr"]:
                    tz = 0

                if site_ref.lower() in [
                    "jfj",
                    "kps",
                    "rig",
                    "pay",
                    "glh",
                    "cmn",
                    "zep",
                    "dig",
                    "hhe",
                    "ktb",
                    "stp",
                    "ivn",
                    "jcz",
                    "kam",
                    "lzp",
                    "snz",
                    "zbl",
                    "kmw",
                    "don",
                    "mhn",
                    "nia",
                    "roq",
                    "spm",
                ]:
                    tz = 1

                if site_ref.lower() in ["rcv", "aht", "oul", "uto", "vir", "fdt", "sem", "stn"]:
                    tz = 2

                if site_ref.lower() in ["dak"]:
                    tz = 3

                if site_ref.lower() in ["shp"]:
                    tz = 4

                if site_ref.lower() in ["isk"]:
                    tz = 5

                if site_ref.lower() in ["hkg"]:
                    tz = 8

                if site_ref.lower() in ["cgo"]:
                    tz = 10
            else:
                tz = tz.replace("LocaltimeUTC", "")
                tz = tz.replace("OtherUTC", "")
                tz = tz.replace("Localtime", "")
                tz = tz.replace(":", ".")

                try:
                    before, sep, after = tz.rpartiton(".")
                    after = int(after)
                    conv = (100.0 / 60) * after
                    tz = before + sep + str(conv)
                except:
                    1 + 1
                tz = float(tz)

        else:
            tz = 0

        # check tz is whole number else skip site
        if (tz % 1) != 0:
            print "File Invalid, timezone is not a whole number."
            conc[:] = -99999

        # process dates from date, time to days since start year
        dates = [s.replace("-", "") for s in dates]
        times = [s.replace(":", "") for s in times]

        if file_res == "hr":
            # some times go from 0100 to 2400, assume this is when sites report ave for hour previous. Thus all times should have hour minused
            for i in range(len(times)):
                if times[i] == "2400":
                    current_date = dates[i]
                    test = np.array(dates) == current_date
                    indices = [i for i, x in enumerate(test) if x]
                    for x in indices:
                        current_time = times[x]
                        if current_time == "2400":
                            current_time = "0000"
                        date_datetime = datetime.datetime(
                            int(current_date[0:4]),
                            int(current_date[4:6]),
                            int(current_date[6:]),
                            int(current_time[:2]),
                            int(current_time[2:]),
                        )
                        date_datetime = date_datetime - datetime.timedelta(hours=1)
                        times[x] = date_datetime.strftime("%H%M")

            # adjust dates and times if tz is not equal to 0
            if tz != 0:
                for i in range(len(dates)):
                    # create datetime
                    dt = datetime.datetime(
                        int(dates[i][:4]), int(dates[i][4:6]), int(dates[i][6:]), int(times[i][:2]), int(times[i][2:])
                    )
                    if tz > 0:
                        # print 'Old dt', dt
                        dt = dt - datetime.timedelta(hours=int(tz))
                        # print 'New dt', dt
                    elif tz < 0:
                        # print 'Old dt', dt
                        dt = dt + datetime.timedelta(hours=np.abs(int(tz)))
                        # print 'New dt', dt
                    dates[i] = dt.strftime("%Y%m%d")
                    times[i] = dt.strftime("%H%M")

        data = [dates, times, conc]
        try:
            big_list = np.hstack((big_list, data))
        except:
            big_list = np.array(data)

        if s_count == site_file_len:

            # make sure big list exists
            try:
                big_list
            except:
                data_valid = False

            if data_valid == True:

                # get dates and times
                date_con = big_list[0, :]
                time_con = big_list[1, :]

                # get vals
                vals = np.array(big_list[2, :]).astype(float)

                # delete big list
                del big_list

                # if dates outside what asked for exclude
                first_date_val = int("%s0101" % (start_year))
                last_date_val = int("%s1231" % (end_year))

                test_valid = (np.array(date_con).astype(int) >= first_date_val) & (
                    np.array(date_con).astype(int) <= last_date_val
                )
                date_con = date_con[test_valid]
                time_con = time_con[test_valid]
                vals = vals[test_valid]

                # Check if any times are duplicate, if so delete all but first
                del_list = []
                for d in range(len(date_con) - 1):
                    if (date_con[d] == date_con[d + 1]) & (time_con[d] == time_con[d + 1]):
                        del_list.append(d + 1)
                if len(del_list) > 0:
                    print "Deleting duplicate timepoints"
                    print date_con[del_list], time_con[del_list]
                    date_con = np.delete(date_con, del_list)
                    time_con = np.delete(time_con, del_list)
                    vals = np.delete(vals, del_list)

                # if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
                count = 0
                if file_res == "da":
                    file_hours = len(date_con)
                    for i in range(file_hours):
                        current_hh = int(time_con[count][:2])
                        current_mm = int(time_con[count][2:])
                        s = datetime.datetime(year=start_year, month=1, day=1, hour=current_hh, minute=current_mm)
                        e = datetime.datetime(year=start_year, month=1, day=2, hour=current_hh, minute=current_mm)
                        day_hours = [d.strftime("%H%M") for d in pd.date_range(s, e, freq="H")][1:-1]

                        date_con = np.insert(date_con, count + 1, [date_con[count]] * 23)
                        time_con = np.insert(time_con, count + 1, day_hours)
                        vals = np.insert(vals, count + 1, [vals[count]] * 23)

                        count += 24

                if file_res == "mo":
                    file_hours = len(date_con)
                    for i in range(file_hours):
                        current_year = int(date_con[count][:4])
                        current_month = int(date_con[count][4:6])

                        next_month = current_month + 1
                        if next_month > 12:
                            next_month = 1
                            next_year = current_year + 1
                        else:
                            next_year = current_year

                        s = datetime.datetime(year=current_year, month=current_month, day=1, hour=1, minute=0)
                        e = datetime.datetime(year=next_year, month=next_month, day=1, hour=0, minute=0)

                        day_date = [d.strftime("%Y%m%d") for d in pd.date_range(s, e, freq="H")][:-1]
                        day_hour = [d.strftime("%H%M") for d in pd.date_range(s, e, freq="H")][:-1]
                        date_con = np.insert(date_con, count + 1, day_date)
                        time_con = np.insert(time_con, count + 1, day_hour)
                        vals = np.insert(vals, count + 1, [vals[count]] * len(day_date))
                        count += len(day_date) + 1

                date_con = np.array(date_con).astype(int)
                time_con = np.array(time_con).astype(int)

                # create max possible o3 grid
                o3_data = np.empty(n_hours)
                o3_data[:] = -99999

                # delete dates,times and var outside date range
                val_test = (date_con >= int(output_res_dates_strings[0])) & (
                    date_con <= int(output_res_dates_strings[-1])
                )
                date_con = date_con[val_test]
                time_con = time_con[val_test]
                vals = vals[val_test]

                print date_con

                # find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                converted_time = modules.date_process(date_con, time_con, start_year)
                converted_time = np.round(converted_time, decimals=5)
                syn_grid_time = np.arange(0, n_days, 1.0 / 24)
                syn_grid_time = np.round(syn_grid_time, decimals=5)
                # find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                indices = np.searchsorted(syn_grid_time, converted_time, side="left")
                o3_data[indices] = vals

                # convert all Nans back to -99999
                test = np.isnan(o3_data)
                o3_data[test] = -99999

                # get mode of metadata
                lat = np.float64(stats.mode(all_lat)[0][0])
                lon = np.float64(stats.mode(all_lon)[0][0])
                alt = np.float64(stats.mode(all_alt)[0][0])
                st = stats.mode(all_st)[0][0]
                mm = stats.mode(all_mm)[0][0]

                # check site is not urban using anthrome map from 2000
                anthfile = "/work/home/db876/plotting_tools/core_tools/anthro2_a2000.nc"
                anthload = Dataset(anthfile)
                class_valid, anthrome_class_name = modules.anthrome_classify(anthload, [lat], [lon])
                if class_valid == "invalid":
                    data_valid = False
                    print "Site Invalid, site classed as urban by anthrome map."

                # get measurement type and sampling type (take mode from collected list)
                if (st == "continuous") or (
                    st == "continuous(carbondioxide),remotespectroscopicmethod(methaneandsurfaceozone)"
                ):
                    st = "average"
                elif st == "flask":
                    st = "flask"
                elif st == "filter":
                    st = "filter"
                else:
                    print st
                    1 + "a"

                if mm == "Lightabsorptionanalysis(UV)":
                    mm = "ultraviolet photometry"

                elif mm == "CavityRingdownSpectroscopy":
                    mm = "cavity ringdown spectroscopy"

                elif mm == "NDIR":
                    site_mm = "non-dispersive infrared spectroscopy"

                elif mm == "GasChromatography(FID)":
                    site_mm = "gas chromatography flame ionisation detection"

                elif mm == "Gas Chromatography (RGD)":
                    site_mm = "gas chromatography reduction gas detection"

                elif mm == "Chemiluminescence":
                    mm = "chemiluminescence"

                elif (mm == "Spectrophotometry") or (
                    mm == "spectrophotometry,naphthyl-ethylenediaminedihydrochloridemethod"
                ):
                    mm = "spectrophotometry"

                elif mm == "":
                    if species == "O3":
                        mm = "ultraviolet photometry"
                    if species == "CO":
                        mm = "non-dispersive infrared spectroscopy"
                    if species == "NO2":
                        mm = "chemiluminescence"
                    if species == "NO":
                        mm = "chemiluminescence"
                    if species == "ISOP":
                        mm = "gas chromatography flame ionisation detection"

                # do data quality checks
                full_data, data_valid = modules.quality_check(
                    o3_data, data_valid, data_resolution, alt, grid_dates, start_year, end_year
                )

                # convert file res to standard format
                if file_res == "hr":
                    file_res = "H"
                elif file_res == "da":
                    file_res = "D"
                elif file_res == "mo":
                    file_res = "M"

                # no raw class so set as na
                raw_class_name = "na"

                return c, full_data, data_valid, lat, lon, alt, raw_class_name, anthrome_class_name, mm, st, file_res
Пример #33
0
def site_iter_process(valid_refs, c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

    site_resolutions = []

    site_ref = valid_refs[c]

    data_valid = True
    print 'ref = ', site_ref, c

    if species != 'ISOP':
        site_test = all_refs == site_ref
        site_yyyymmdd = yyyymmdd[site_test]
        site_hhmm = hhmm[site_test]
        site_vals = vals[site_test]
        n_dup_array = np.array([0] * len(site_vals))
    else:
        if site_ref[0] == '0':
            site_ref = site_ref[1:]
        files = []
        site_yyyymmdd = []
        site_hhmm = []
        site_vals = []
        n_dup_array = []
        for y in all_years:
            try:
                files.append(
                    glob.glob('../CANADANAPS/VOC%s/S%s*' % (y, site_ref)))
            except:
                pass
        files = [item for sublist in files for item in sublist]
        for f in files:
            print f
            all_data = get_data(f)
            all_data = all_data.values()
            test_header_range = range(0, 10)
            for x in test_header_range:
                headers = all_data[0][x]
                if 'Isoprene' in headers:
                    header_x = x
                    break
            data_cut = all_data[0][header_x + 1:]
            var_i = headers.index('Isoprene')
            #date_i = headers.index('Sample Date')
            date_i = headers.index('Compounds')
            time_i = headers.index('START TIME')
            duration_i = headers.index('DURATION')

            for i in range(len(data_cut)):
                row_cut = data_cut[i]

                try:
                    dur = float(row_cut[duration_i])
                    if dur.is_integer() == False:
                        dur = round(dur, 0)
                except:
                    #round to nearest hour if necessary
                    if float(row_cut[duration_i].strftime("%M")) != 0:
                        if dur >= 30:
                            dur = float(row_cut[duration_i].strftime("%H")) + 1
                        else:
                            dur = float(row_cut[duration_i].strftime("%H"))
                    else:
                        dur = float(row_cut[duration_i].strftime("%H"))

                if dur.is_integer() == False:
                    print 'duration is float'
                    1 + 'a'

                try:
                    val = np.float64(row_cut[var_i])
                except:
                    val = -99999

                if dur == 1:
                    site_resolutions.append('H')

                    #if (val >= 0.01):
                    #    site_vals.append([val])
                    #else:
                    #    site_vals.append([-99999])
                    site_vals.append([val])

                    n_dup_array.append([0])
                    site_yyyymmdd.append([row_cut[date_i].strftime("%Y%m%d")])
                    try:
                        site_hhmm.append(
                            [row_cut[time_i][:2] + row_cut[time_i][3:5]])
                    except:
                        #round to nearest hour if necessary
                        ti = row_cut[time_i].strftime("%H%M")
                        if float(row_cut[time_i].strftime("%M")) != 0:
                            print 'non whole time = ', row_cut[time_i]
                            if float(row_cut[time_i].strftime("%M")) >= 30:
                                site_hhmm.append([
                                    datetime.time(hour=int(ti[:2]) + 1,
                                                  minute=0).strftime("%H%M")
                                ])
                            else:
                                site_hhmm.append([
                                    datetime.time(hour=int(ti[:2]),
                                                  minute=0).strftime("%H%M")
                                ])

                        else:
                            site_hhmm.append(
                                [row_cut[time_i].strftime("%H%M")])
                #deal with sample lens > 1 hour
                else:
                    if output_res == 'H':
                        continue
                    else:
                        site_resolutions.append('D')

                        #if (val >= 0.01):
                        #    site_vals.append([val])
                        #else:
                        #    site_vals.append([-99999])
                        site_vals.append([val])

                        n_dup_array.append([0])

                        try:
                            site_yyyymmdd.append(
                                [row_cut[date_i].strftime("%Y%m%d")])
                        except:
                            print row_cut[date_i]
                            1 + 'a'
                        try:
                            site_hhmm.append(
                                [row_cut[time_i][:2] + row_cut[time_i][3:5]])
                        except:
                            #round to nearest hour if necessary
                            ti = row_cut[time_i].strftime("%H%M")
                            if float(row_cut[time_i].strftime("%M")) != 0:
                                print 'non whole time = ', row_cut[time_i]
                                if float(row_cut[time_i].strftime("%M")) >= 30:
                                    site_hhmm.append([
                                        datetime.time(
                                            hour=int(ti[:2]) + 1,
                                            minute=0).strftime("%H%M")
                                    ])
                                else:
                                    site_hhmm.append([
                                        datetime.time(
                                            hour=int(ti[:2]),
                                            minute=0).strftime("%H%M")
                                    ])

                            else:
                                site_hhmm.append(
                                    [row_cut[time_i].strftime("%H%M")])

                        current_year = int(site_yyyymmdd[-1][0][:4])
                        current_month = int(site_yyyymmdd[-1][0][4:6])
                        current_day = int(site_yyyymmdd[-1][0][6:])
                        current_hh = int(site_hhmm[-1][0][:2])
                        current_mm = int(site_hhmm[-1][0][2:])

                        s = datetime.datetime(year=current_year,
                                              month=current_month,
                                              day=current_day,
                                              hour=current_hh,
                                              minute=current_mm)
                        e = s + datetime.timedelta(hours=dur)
                        day_dates = [
                            d.strftime('%Y%m%d')
                            for d in pd.date_range(s, e, freq='H')
                        ][1:-1]
                        day_hours = [
                            d.strftime('%H%M')
                            for d in pd.date_range(s, e, freq='H')
                        ][1:-1]

                        site_yyyymmdd.append(day_dates)
                        site_hhmm.append(day_hours)
                        site_vals.append([site_vals[-1][0]] * len(day_dates))

                        #append to n duplicated array
                        n_dup_array.append([0])
                        n_dup_array.append([1] * len(day_dates))

    if species == 'ISOP':
        site_yyyymmdd = [item for sublist in site_yyyymmdd for item in sublist]
        site_hhmm = [item for sublist in site_hhmm for item in sublist]
        site_vals = [item for sublist in site_vals for item in sublist]
        n_dup_array = np.array(
            [item for sublist in n_dup_array for item in sublist])
        if len(site_ref) == 5:
            site_ref = '0' + site_ref

    site_vals = np.float64(site_vals)

    #add val to total obs count
    n_all += len(site_vals)

    #test if site_ref in meta_refs, if not then exit
    if site_ref not in meta_refs:
        print site_ref
        inv_nometa += 1
        print 'Site Invalid. No Metadata for ref'
        if no2_type == 'MOLYBDENUM':
            n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_obs_after_anyvaliddata, inv_nokeymeta, n_obs_after_nokeymeta, inv_resolution, n_obs_after_resolution, inv_badmeasurementmethod, n_obs_after_badmeasurementmethod = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na',
            'na'
        ]
        exit_r = 'nometa'
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)
    n_after_nometa += len(site_vals)

    #convert all invalids to -99999
    test_inv = site_vals < 0
    site_vals[test_inv] = -99999

    #create max possible grid
    full_data = np.empty(n_hours)
    full_data_after_flagsandlod = np.empty(n_hours)
    big_n_dup_array = np.zeros(n_hours)
    full_data[:] = -99999
    full_data_after_flagsandlod[:] = -99999

    #get meta
    meta_index = meta_refs.index(site_ref)
    data_tz = np.float32(meta_tz[meta_index])
    all_tz = [data_tz]
    try:
        lat = np.float32(meta_lats[meta_index])
    except:
        lat = 'na'
    try:
        lon = np.float32(meta_lons[meta_index])
    except:
        lon = 'na'
    try:
        alt = np.float32(meta_alts[meta_index])
    except:
        alt = 'na'
    raw_class_name = meta_class[meta_index]
    site_name = meta_sitenames[meta_index]
    unit = 'na'
    contact = meta_contacts[meta_index]
    country = meta_countries[meta_index]

    #adjust dates and times if tz is not equal to 0
    tz = int(data_tz)
    if tz != 0:
        for i in range(len(site_yyyymmdd)):
            #create datetime
            dt = datetime.datetime(int(site_yyyymmdd[i][:4]),
                                   int(site_yyyymmdd[i][4:6]),
                                   int(site_yyyymmdd[i][6:]),
                                   int(site_hhmm[i][:2]),
                                   int(site_hhmm[i][2:]))
            if tz > 0:
                dt = dt - datetime.timedelta(hours=int(tz))
            elif tz < 0:
                dt = dt + datetime.timedelta(hours=np.abs(int(tz)))
            site_yyyymmdd[i] = dt.strftime("%Y%m%d")
            site_hhmm[i] = dt.strftime("%H%M")

    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)

    #remove data < 1970 and >= 2015
    test_inds = (date_con >= 19700101) & (date_con < 20150101)
    date_con = date_con[test_inds]
    time_con = time_con[test_inds]
    site_vals = site_vals[test_inds]
    n_dup_array = n_dup_array[test_inds]

    #set st_big and mm_big
    st_big = ['continuous'] * len(site_vals)

    if species == 'O3':
        mm_big = ['ultraviolet photometry'] * len(site_vals)
    elif species == 'NO':
        mm_big = ['chemiluminescence'] * len(site_vals)
    elif species == 'NO2':
        mm_big = ['chemiluminescence (conversion-molybdenum)'] * len(site_vals)
    elif species == 'CO':
        mm_big = ['non-dispersive infrared spectrometry'] * len(site_vals)
    elif species == 'ISOP':
        mm_big = ['gas chromatography mass selective detection'
                  ] * len(site_vals)

    #get obs valid after flagsandlod
    test = site_vals != -99999
    valid_hours_dup = np.sum(n_dup_array[test])
    n_obs_valid = len(site_vals[test] - valid_hours_dup)
    n_after_flagsandlod += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con, time_con, start_year)
    converted_time = np.round(converted_time, decimals=5)
    syn_grid_time = np.arange(0, n_days, 1. / 24)
    syn_grid_time = np.round(syn_grid_time, decimals=5)
    raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    full_data_after_flagsandlod[raw_indices] = site_vals
    raw_st = np.copy(st_big)
    raw_mm = np.copy(mm_big)

    # test and remove duplicate and overlap points
    converted_time, site_vals, mm_big, st_big, n_dup_array = modules.remove_duplicate_points(
        site_ref, converted_time, site_vals, mm_big, st_big, n_dup_array,
        output_res)
    test = site_vals != -99999
    valid_hours_dup = np.sum(n_dup_array[test])
    n_obs_valid = int(len(site_vals[test]) - valid_hours_dup)
    print 'n obs valid = ', n_obs_valid
    n_after_duplicate += n_obs_valid

    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    full_data[indices] = site_vals
    big_n_dup_array[indices] = n_dup_array

    #if species is CO then convert units from ppmv to ppbv
    if species == 'CO':
        valid_inds = full_data != -99999
        full_data[valid_inds] = full_data[valid_inds] * 1e3

    #if species is ISOP then connvert units from mg/m3 to ppbv
    if species == 'ISOP':
        #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
        #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
        conv_fact = 8.3144 / mol_mass * (273.15 + 25) / (1013.25 / 10)
        valid_inds = full_data != -99999
        full_data[valid_inds] = full_data[valid_inds] * conv_fact

    key_meta = [lat, lon, alt]

    #set site file resolution
    if (species == 'O3') or (species == 'CO') or (species
                                                  == 'NO') or (species
                                                               == 'NO2'):
        file_res = 'H'
    else:
        # if no valid data then site res does not matter
        if len(site_resolutions) == 0:
            file_res = 'na'
        else:
            #if all site resolutions are same continue then take first file_res
            all_same = all(x == site_resolutions[0] for x in site_resolutions)
            if all_same == True:
                file_res = site_resolutions[0]
            else:
                #otherwise take highest frequency res as file_res
                if 'M' in site_resolutions:
                    file_res = 'M'
                elif 'D' in site_resolutions:
                    file_res = 'D'
                else:
                    file_res = 'H'

    #get sampling/instrument grids
    raw_st_grid, p_st_grid, p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid, p_mm_grid_after_flagsandlod, unknown_mm_list, unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(
        site_ref, process_group, species, raw_st, raw_mm,
        full_data_after_flagsandlod, full_data, raw_indices, unknown_mm_list,
        unknown_mm_refs_list, no2_type)

    #do quality checks
    data_valid, full_data, valid_hours_dup, p_st_grid, p_mm_grid, n_all, inv_nometa, n_after_nometa, n_after_flagsandlod, n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata, inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution, inv_badmeasurementmethod, n_after_badmeasurementmethod, exit_r = modules.primary_quality_control(
        site_ref, species, file_res, no2_type, grid_dates, full_data,
        big_n_dup_array, valid_hours_dup, raw_st_grid, p_st_grid,
        p_st_grid_after_flagsandlod, raw_mm_grid, p_mm_grid,
        p_mm_grid_after_flagsandlod, data_resolution, n_obs_valid, key_meta,
        n_all, inv_nometa, n_after_nometa, n_after_flagsandlod,
        n_after_duplicate, inv_anyvaliddata, n_after_anyvaliddata,
        inv_nokeymeta, n_after_nokeymeta, inv_resolution, n_after_resolution,
        inv_badmeasurementmethod, n_after_badmeasurementmethod)
    if data_valid == False:
        exit_c_list = np.array([
            inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
            inv_badmeasurementmethod
        ])
        n_c_list = np.array([
            n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
            n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
            n_after_badmeasurementmethod
        ])
        unknown_list = [
            unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
        ]
        meta = [
            lat, lon, alt, 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na', 'na'
        ]
        return c, ['na'], ['na'], [
            'na'
        ], False, meta, exit_c_list, n_c_list, unknown_list, exit_r, np.zeros(
            1)

    #make tz int after checks
    data_tz = np.float32(data_tz)

    #set processed unit
    p_unit = 'pbbv'

    #get local timezone
    try:
        local_tz_name = tz_root.tzNameAt(lat, lon, forceTZ=True)
        pytz_obj = pytz.timezone(local_tz_name)
        datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000, 1, 1))
        if datetime_offset < datetime.timedelta(0):
            local_tz = -(24 - int(datetime_offset.seconds / 60 / 60))
        else:
            local_tz = int(datetime_offset.seconds / 60 / 60)
    except:
        local_tz = 'na'
        print 'TIMEZONE NOT KNOWN, SITE IS %s' % (site_ref)
        unknown_local_tz_list.append(site_ref)

    #pack meta
    meta = [
        lat, lon, alt, raw_class_name, file_res, unit, p_unit, data_tz,
        local_tz, site_name, country, contact
    ]

    #if blank strings in meta then convert to 'na'
    for i in range(len(meta)):
        try:
            if meta[i].strip() == '':
                meta[i] = 'na'
        except:
            pass

    print set(raw_st_grid)
    print set(raw_mm_grid)
    print set(p_st_grid)
    print set(p_mm_grid)
    print meta

    exit_c_list = np.array([
        inv_nometa, inv_anyvaliddata, inv_nokeymeta, inv_resolution,
        inv_badmeasurementmethod
    ])
    n_c_list = np.array([
        n_all, n_after_nometa, n_after_flagsandlod, n_after_duplicate,
        n_after_anyvaliddata, n_after_nokeymeta, n_after_resolution,
        n_after_badmeasurementmethod
    ])
    print 'exit counts = ', exit_c_list
    print 'n obs counts = ', n_c_list

    unknown_list = [
        unknown_mm_list, unknown_mm_refs_list, unknown_local_tz_list
    ]

    return c, full_data, p_st_grid, p_mm_grid, data_valid, meta, exit_c_list, n_c_list, unknown_list, 'na', big_n_dup_array
Пример #34
0
#find model data gridbox to compare with obs.

#get model gridbox for obs site
lat_n, lon_n = modules.obs_model_gridbox(lat_e, lon_e, obs_lat, obs_lon)

model_var = model_var[:, lat_n, lon_n]
model_var = model_var * 1e9

model_var_mask = np.ma.masked_where(model_var <= 0, model_var)
model_ave = np.ma.average(model_var_mask)
model_var = model_var[~np.isnan(model_var_mask)]

#--------------------------------------------
#get valid data and process time

obs_time = np.array(modules.date_process(obs_date, obs_time, start_year))
model_time = np.array(modules.date_process(model_date, model_time, start_year))

model_test = model_var >= 0
model_var = model_var[model_test]
model_time = model_time[model_test]

obs_test = obs_var >= 0
obs_var = obs_var[obs_test]
obs_time = obs_time[obs_test]

#--------------------------------------------
#take LSP's

#windowing?
wind_set = raw_input('Windowing? Y or N?\n')
Пример #35
0
def site_iter_process(valid_refs,c):
    #set local counts
    inv_nometa = 0
    inv_anyvaliddata = 0
    inv_nokeymeta = 0
    inv_resolution = 0
    inv_badmeasurementmethod = 0
    n_all = 0
    n_after_nometa = 0
    n_after_flagsandlod = 0
    n_after_duplicate = 0
    n_after_anyvaliddata = 0
    n_after_nokeymeta = 0
    n_after_resolution = 0
    n_after_badmeasurementmethod = 0

    #set local unknown lists
    unknown_mm_list = []
    unknown_mm_refs_list = []
    unknown_local_tz_list = []

    data_valid = True
    
    site_ref = valid_refs[c]
    print 'ref = ',site_ref,c
    
    #read in site data from chunk
    site_yyyymmdd = a_site_yyyymmdd[c]
    site_hhmm = a_site_hhmm[c]
    site_vals = a_site_vals[c]
    mm_big = a_mm_big[c]
    site_units = a_site_units[c]
    site_res = a_site_res[c]
    n_dup_arr = a_n_dup_arr[c]
    lat = a_lat[c]
    lon = a_lon[c]
    alt = a_alt[c]
    unit = a_unit[c]
    raw_class_name = a_raw_class_name[c]
    site_name = a_site_name[c]
    no_meta = a_no_meta[c]
    country = 'United States'
    contact = '*****@*****.**'
    
    print '1'
    
    try:
        lat = np.float32(lat)
    except:
        pass
    try:
        lon = np.float32(lon)
    except:
        pass  
    try:
        alt = np.float32(alt)
    except:
        pass
        

#process data for each site at a time
#for site_ref in valid_refs:
    #site_ref = valid_refs[c]
    #site_test = all_refs == site_ref
    #site_yyyymmdd = yyyymmdd[site_test]
    #site_hhmm = hhmm[site_test]
    #site_vals = vals[site_test]
    #mm_big = all_mm[site_test]
    #site_units = all_units[site_test]
    #if species == 'ISOP':
    #    n_dup_arr = n_dup_array[site_test]
    #    site_res = site_resolutions[site_test]
    #else:
    #    n_dup_arr = np.zeros(len(site_vals))
    
    #convert to ppb
    if (species == 'O3') or (species == 'NO') or (species == 'NO2') or (species == 'CO'):
        for i in range(len(site_vals)):
            if site_units[i] == 'Parts per million':
                site_vals[i] = site_vals[i]*1.e3
            elif site_units[i] == 'Parts per billion':
                pass
            else:
                print site_units[i]
                1+'a'
        
    # convert to ppb
    if species == 'ISOP':
        for i in range(len(site_vals)):
            #078 is Parts per billion Carbon, Isoprene has 5 Carbons
            if site_units[i] == 'Parts per billion Carbon':
                site_vals[i] = site_vals[i]/5.  
            #008 is Parts per billion
            elif site_units[i] == 'Parts per billion':
                pass
            #101 is Parts per million Carbon
            elif site_units[i] == 'Parts per million Carbon':
                site_vals[i] = (site_vals[i]/5.)*1.e3
            else:
                print site_units[i]
                1+'a'
               
    #add val to total obs count
    valid_hours_dup = np.sum(n_dup_arr)
    n_all += len(site_vals) - valid_hours_dup
    
    #get site meta
    #try:
    #    meta_index = meta_refs.index(site_ref)
    #    try:
    #        lat = np.float32(meta_lats[meta_index])
    #    except:
    #        lat = 'na'
    #    try:
    #        lon =  np.float32(meta_lons[meta_index])
    #    except:
    #        lon = 'na'
    #    try:
    #        alt =  np.float32(meta_alts[meta_index])
    #    except:
    #        alt = 'na'
    #except:
    #    pass
    
    #get local timezone
    try:
        local_tz_name = tz_root.tzNameAt(lat,lon,forceTZ=True)
        pytz_obj = pytz.timezone(local_tz_name)
        datetime_offset = pytz_obj.utcoffset(datetime.datetime(2000,1,1))
        if datetime_offset < datetime.timedelta(0):
            local_tz = -(24-int(datetime_offset.seconds/60/60))
        else:
            local_tz = int(datetime_offset.seconds/60/60)
    except:
        local_tz = 'na'
        print 'TIMEZONE NOT KNOWN, SITE IS %s'%(site_ref)
        unknown_local_tz_list.append(site_ref)
    #if species is ISOP set data_tz as local_tz
    if species == 'ISOP':
        data_tz = int(local_tz)
    else:
        data_tz = 0
    
    #test if site_ref in meta_refs, if not then exit
    #also test for ISOP if have local_tz
    
    if (no_meta == 'Yes') or (data_tz == 'na'):
        inv_nometa+=1
        print 'Site Invalid. No Metadata for ref'
        if no2_type == 'MOLYBDENUM':
            n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_obs_after_anyvaliddata,inv_nokeymeta,n_obs_after_nokeymeta,inv_resolution,n_obs_after_resolution,inv_badmeasurementmethod,n_obs_after_badmeasurementmethod = 0,0,0,0,0,0,0,0,0,0,0,0,0
        exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod])
        n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod])
        unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list]
        meta = ['na','na','na','na','na','na','na','na','na','na','na','na']
        exit_r = 'nometa'
        return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1)
    

    valid_hours_dup = np.sum(n_dup_arr)
    n_after_nometa += len(site_vals) - valid_hours_dup
        
    #adjust dates and times if tz is not equal to 0 (only for ISOP)
    #use local tz calc to adjust times to UTC
    if species == 'ISOP':
        tz = int(data_tz)
        if tz != 0:
            for i in range(len(site_yyyymmdd)):
                #create datetime
                dt = datetime.datetime(int(site_yyyymmdd[i][:4]),int(site_yyyymmdd[i][4:6]),int(site_yyyymmdd[i][6:]),int(site_hhmm[i][:2]),int(site_hhmm[i][2:]))
                if tz > 0:
                    dt  = dt - datetime.timedelta(hours = int(tz))
                elif tz < 0:
                    dt  = dt + datetime.timedelta(hours = np.abs(int(tz)))
                site_yyyymmdd[i] = dt.strftime("%Y%m%d")
                site_hhmm[i] = dt.strftime("%H%M")
 
    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)
    
    #remove data < 1970 and >= 2015
    test_inds = (date_con >= 19700101) & (date_con < 20150101)
    date_con = date_con[test_inds]
    time_con = time_con[test_inds]
    site_vals = site_vals[test_inds]
    mm_big = mm_big[test_inds]
    n_dup_arr = n_dup_arr[test_inds]
    
    #set st_big as 'continuous'
    st_big = ['continuous']*len(site_vals)
    
    #get obs valid
    test = site_vals >= 0
    valid_hours_dup = np.sum(n_dup_arr[test])
    n_obs_valid = len(site_vals[test]) - valid_hours_dup
    n_after_flagsandlod += n_obs_valid
    
    #create max possible grid
    full_data = np.empty(n_hours)
    full_data_after_flagsandlod = np.empty(n_hours)
    big_n_dup_array = np.zeros(n_hours)
    full_data[:] = -99999
    full_data_after_flagsandlod[:] = -99999
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    raw_indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    full_data_after_flagsandlod[raw_indices] = site_vals
    raw_st = np.copy(st_big)
    raw_mm = np.copy(mm_big)
    
    #test and remove duplicate and overlap points
    converted_time,site_vals,mm_big,st_big,n_dup_arr = modules.remove_duplicate_points(site_ref,converted_time,site_vals,mm_big,st_big,n_dup_arr,output_res)
    test = site_vals >= 0
    valid_hours_dup = np.sum(n_dup_arr[test])
    n_obs_valid = int(len(site_vals[test]) - valid_hours_dup)
    n_after_duplicate += n_obs_valid
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    full_data[indices] = site_vals 
    big_n_dup_array[indices] = n_dup_arr
    
    #unit = stats.mode(site_units)[0][0]
    #raw_class_name = meta_class[meta_index]
    #site_name = meta_sitenames[meta_index]
    #country = 'United States'
    #contact = '*****@*****.**'
    all_tz = [data_tz]
    
    key_meta = [lat,lon,alt]
    
    #set site file resolution 
    if species != 'ISOP':
        file_res = 'H'
    else:
        #if all site resolutions are same continue then take first file_res
        all_same = all(x == site_res[0] for x in site_res)
        if all_same == True:
            file_res = site_res[0]
        else:
        #otherwise take highest frequency res as file_res 
            if 'M' in site_res:
                file_res = 'M'
            elif 'D' in site_res:
                file_res = 'D'
            else:
                file_res = 'H'
    
    #get sampling/instrument grids
    raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,unknown_mm_list,unknown_mm_refs_list = modules.sampling_and_instruments_by_processgroup(site_ref,process_group,species,raw_st,raw_mm,full_data_after_flagsandlod,full_data,raw_indices,unknown_mm_list,unknown_mm_refs_list,no2_type)

    print set(p_mm_grid)

    #do quality checks                                                                                                                                                                                                                                                                                                     
    data_valid,full_data,valid_hours_dup,p_st_grid,p_mm_grid,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod,exit_r = modules.primary_quality_control(site_ref,species,file_res,no2_type,grid_dates,full_data,big_n_dup_array,valid_hours_dup,raw_st_grid,p_st_grid,p_st_grid_after_flagsandlod,raw_mm_grid,p_mm_grid,p_mm_grid_after_flagsandlod,data_resolution,n_obs_valid,key_meta,n_all,inv_nometa,n_after_nometa,n_after_flagsandlod,n_after_duplicate,inv_anyvaliddata,n_after_anyvaliddata,inv_nokeymeta,n_after_nokeymeta,inv_resolution,n_after_resolution,inv_badmeasurementmethod,n_after_badmeasurementmethod)
    if data_valid == False:
        exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod])
        n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod])
        unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list]
        meta = [lat,lon,alt,'na','na','na','na','na','na','na','na','na']
        return c,['na'],['na'],['na'],False,meta,exit_c_list,n_c_list,unknown_list,exit_r,np.zeros(1)

    #set processed unit
    p_unit = 'pbbv'

    #pack meta
    meta = [lat,lon,alt,raw_class_name,file_res,unit,p_unit,data_tz,local_tz,site_name,country,contact]
    
    #if blank strings in meta then convert to 'na'
    for i in range(len(meta)):
        try:
            if meta[i].strip() == '':
                meta[i] = 'na'
        except:
            pass
    
    print set(raw_st_grid)
    print set(raw_mm_grid)
    print set(p_st_grid)
    print set(p_mm_grid)
    print meta
    
    
    exit_c_list = np.array([inv_nometa,inv_anyvaliddata,inv_nokeymeta,inv_resolution,inv_badmeasurementmethod])
    n_c_list = np.array([n_all,n_after_nometa,n_after_flagsandlod,n_after_duplicate,n_after_anyvaliddata,n_after_nokeymeta,n_after_resolution,n_after_badmeasurementmethod])
    print 'exit counts = ', exit_c_list
    print 'n obs counts = ', n_c_list

    unknown_list = [unknown_mm_list,unknown_mm_refs_list,unknown_local_tz_list]

    return c,full_data,p_st_grid,p_mm_grid,data_valid,meta,exit_c_list,n_c_list,unknown_list,'na',big_n_dup_array