def site_iter_process(valid_refs,c):
#for ref_i in range(len(valid_refs)):
    data_valid = True

    site_ref = valid_refs[c]
    print 'Current Ref is = ', site_ref

    s_files = glob.glob('/work/home/db876/observations/surface/%s/CAPMON/ozon_smpls_%s*'%(species,site_ref))
    site_files = []
    for y in year_array:
        for f in s_files:
            if str(y) in f:
                site_files.append(f)
                           

    site_files = modules.natsorted(site_files)

    yymmdd = []
    hhmm = []
    vals = []

    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999

    for file_i in range(len(site_files)):

        count = 0
        meta_start = -99999
        start_read_1 = False
        start_read_2 = False

        with open(site_files[file_i], 'rb') as f:
            reader = csv.reader(f,delimiter=',')
            print site_files[file_i]
            for row in reader:
                #print count
               #break out of loop at bottom of file
                if (start_read_2 == True) & (row[0] == '*TABLE ENDS'):
                    break
               
               #get metadata
                try:
                    if (row[0] =='*TABLE NAME') & (row[1] == 'Site information'):
                        meta_start = count+2
                except:
                    pass
                if count == meta_start:
                    lat_i = row.index('Latitude: decimal degrees')
                    lon_i = row.index('Longitude: decimal degrees')
                    try:
                        alt_i = row.index('Ground elevation: above mean sea level')
                    except:
                        alt_i = row.index('Ground altitude')
                    class_i = row.index('Site land use')
                
                if count == (meta_start+6):
                    latitude = row[lat_i]
                    longitude = row[lon_i]
                    altitude = row[alt_i]
                    raw_class_name = row[class_i]
                      
                #get data
                if start_read_2 == True:
                    #read dates, times, and vals
                    date = row[8]
                    time = row[9]
                    yymmdd.append(date[:4]+date[5:7] + date[8:])
                    hhmm.append(time[:2]+time[3:])
                    quality_code = row[13]
                    if quality_code == 'V0':
                        vals = np.append(vals,np.float64(row[12]))
                    else:
                        vals = np.append(vals,-99999)
                    
                try:
                    if (row[0] == '*TABLE NAME') & (row[1] == 'OZONE_HOURLY'):
                        start_read_1 = True
                except:
                    pass
                   
                if (start_read_1 == True) & (row[0] == '*TABLE COLUMN UNITS'):
                    unit = row[12]
                
                if (start_read_1 == True) & (row[0] == '*TABLE BEGINS'):
                    start_read_2 = True
                count+=1

    #convert all invalids to -99999
    test_inv = vals < 0
    vals[test_inv] = -99999

    #put o3 vals into full grid
    date_con = np.array(yymmdd).astype(int)
    time_con = np.array(hhmm).astype(int)
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    vals = np.array(vals)
    #make sure no data is past end year
    index_test = indices < len(full_data)
    indices = indices[index_test]
    vals = vals[index_test]
    full_data[indices] = vals
    
    
    #get metadata
    lat = np.float64(latitude)
    lon = np.float64(longitude)
    alt = np.float64(altitude)
        
    #do data quality checks
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,np.float64(altitude),grid_dates,start_year,end_year)
    
    #set measurement method
    mm = 'ultraviolet photometry'
    
    #set site file resolution
    file_res = 'H'
    
    #set sampling as average
    st = 'average'
    
    anthrome_class_name = 'na'
    
    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def site_iter_process(valid_refs,c):
#process data for each site at a time
#for site_ref in valid_refs:
    site_ref = valid_refs[c]
    data_valid = True
    print 'ref = ',site_ref
    site_test = all_refs == site_ref
    
    site_yyyymmdd = yyyymmdd[site_test]
    site_hhmm = hhmm[site_test]
    site_vals = vals[site_test]
    site_mm = all_mm[site_test]
    site_units = units[site_test]
    
    if species == 'ISOP':
        site_sample_len = sample_len[site_test]
     
    #check for data below limit of detection (only for ISOP) as other species have LOD check by line in file. If it is change to -99999
    #LOD for ISOP if 0.01 ppbv
    if species == 'ISOP':
        lod_test =  site_vals < 0.01
    
    #convert from ppm to ppb
    if (species == 'O3') or (species == 'NO') or (species == 'NO2'):
        for i in range(len(site_vals)):
            if site_units[i] == 'Parts per million':
                site_vals[i] = site_vals[i]*1.e3
            elif site_units[i] == 'Parts per billion':
                pass
            else:
                print site_units[i]
                1+'a'
        
    # convert from ppbC to ppb
    if species == 'ISOP':
        for i in range(len(site_vals)):
            #078 is Parts per billion Carbon, Isoprene has 5 Carbons
            if site_units[i] == '078':
                site_vals[i] = site_vals[i]/5.  
            #008 is Parts per billion
            if site_units[i] == '008':
                pass
            #101 is Parts per million Carbon
            if site_units[i] == '101':
                site_vals[i] = (site_vals[i]/5.)*1.e3
                
        site_vals[lod_test] = -99999
 
    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)
    
    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    
    #if date goes past where it should finish, omit it.
    inv_i = indices < len(full_data)
    indices = indices[inv_i]
    site_vals = site_vals[inv_i]
 
    full_data[indices] = site_vals
    
    #get site meta
    meta_index = meta_refs.index(site_ref)
    lat = np.float64(meta_lats[meta_index])
    lon =  np.float64(meta_lons[meta_index])
    alt =  np.float64(meta_alts[meta_index])
    raw_class_name = meta_class[meta_index]
    
    #get measurement method, take mode of big methods array
    site_mm = stats.mode(site_mm)[0][0]
    if (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETABSORPTION') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET2BMODEL202') or (site_mm.upper() == 'INSTRUMENTAL-UVPHOTOMETRIC') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETRADIATIONABSORBTN') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETPHOTOMETRY') or (site_mm.upper() == 'INSTRUMENTAL-UVABSORPTIONPHOTOMETRY/UV2BMODEL202AND205') or (site_mm.upper() == 'INSTRUMENTAL-ECOTECHSERINUS10'):
        mm = 'ultraviolet photometry'
        
    elif (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GASPHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEAPIMODEL265EANDT265') or (site_mm.upper() == 'LOWLEVELNOXINSTRUMENTAL-TECO42SCHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GAS-PHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPIT200UPPHOTOLYTIC') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPI200EU/501') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9841T') or (site_mm.upper() == 'TELEDYNE-APIMODEL200EUPORT200UP-PHOTOLYTIC-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-TL,42I-TL') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCERHODAMINEBDYE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-Y,42I-Y') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9843'):
        mm = 'chemiluminescence'
        
    elif (site_mm.upper() == 'INSTRUMENTAL-OPENPATHO3ANALYZER') or (site_mm.upper() == 'INSTRUMENTAL-OPENPATHNOANALYZER'):
        mm = 'differential optical absorption spectrosocopy'
        
    elif (site_mm.upper() == 'TELEDYNEMODELT500U-CAVITYATTENUATEDPHASESHIFTSPECTROSCOPY'):
        mm = 'cavity attenuated phase shift spectroscopy'

    elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-GRIESS-SALTZMAN') or (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC'):
        mm = 'griess saltzman colorimetric'
        
    elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-LYSHKOW(MOD)'):
        mm = 'lyshkow colorimetric '
        
    elif (site_mm.upper() == 'INSTRUMENTAL-COULOMETRIC'):
        mm = 'coulometry'
        
    else:
        print site_mm.upper()
        1+'a'

    #do data quality checks
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

    #set site file resolution
    file_res = 'H'
    
    #set sampling as average
    st = 'average'
    
    anthrome_class_name = 'na'

    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def site_iter_process(valid_refs,c):

    #for each valid location process
    #limit obs data due for each site in valid_obs_site_names
    #for c in range(len(valid_refs)):
    
    all_lat = []
    all_lon = []
    all_alt = []
    all_st = []
    all_mm = []

    site_ref = valid_refs[c]

    file_valid = True
    data_valid = True

    print site_ref
    file_res = data_resolutions[c]
    print file_res

    #read files for each valid site
    s_files = sorted(glob.glob('/work/home/db876/observations/surface/%s/GAW/%s**.%s**.dat'%(species,site_ref.lower(),file_res))) 
                  
    print s_files      
    if file_res == 'hr':
        site_files = sorted(s_files, key = lambda x: x.split(".hr")[1])

    else:
        site_files = sorted(s_files)

    delete_inds = []
    if file_res == 'hr':
        #limit site files before and after year limit
        
        for i in range(len(site_files)):
            f = site_files[i]
            year = f.split(".hr")[1][:4]
            if int(year) < int(start_year):
                delete_inds.append(i)
            if int(year) > int(end_year):
                delete_inds.append(i)

        site_files = np.delete(site_files,delete_inds)
        print site_files

    site_file_len = len(site_files)
    s_count = 0
    start_ind = 0
    end_ind = 0
    for f in site_files:
        print f
        read = np.loadtxt(f,dtype="S10,S5,f8",comments='C',usecols=(0,1,4),unpack =True) 	
        read = np.array(read)
    
        dates = read[0,:]
        times = read[1,:]
        conc = read[2,:]
        conc = np.array(conc)
        conc = conc.astype(float)
    
        #change all vals < 0 to np.NaN
        inv_test = conc < 0
        conc[inv_test] = np.NaN
    
        start_ind = end_ind
        end_ind+=len(conc)
    
        s_count+=1
    
        units = [] 
        mycsv = csv.reader(open(f))
        row_count = 0
        for row in mycsv:
            if row_count == 11:
                val = " ".join(row)
                lat = val.replace(" ", "")
                lat = lat[12:]
                lat = float(lat)
                all_lat.append(lat)
            # get lon
            if row_count == 12:
                val = " ".join(row)
                lon = val.replace(" ", "")
                lon = lon[13:]
                lon = float(lon)
                all_lon.append(lon)
            # get altitude
            if row_count == 13:
                val = " ".join(row)
                alt = val.replace(" ", "")
                alt = alt[12:] 
                alt = float(alt) 
                all_alt.append(alt)
            # get units
            if row_count == 20:
                val = " ".join(row)
                unit = val.replace(" ", "")
                unit = unit[19:]           
            # get measurement method
            if row_count == 21:
                val = " ".join(row)
                mm = val.replace(" ", "")
                mm = mm[21:]  
                all_mm.append(mm)
            # get sampling type
            if row_count == 22:
                val = " ".join(row)
                st = val.replace(" ", "")
                st = st[16:]  
                all_st.append(st)
            if row_count == 23:
                val = " ".join(row)
                tz = val.replace(" ", "")
                tz = tz[12:]  

        
            row_count+=1   
        
        # test if units are in ppb for each file - if not convert
    
        if (unit != 'ppb') & (unit != 'ppbv'):
            if (unit == 'ug/m3') or (unit == 'ugN/m3'): 
                print 'converting units, temp = 20degC'
                #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                conc = conv_fact*conc
            elif (unit == 'ug/m3-20C') or (unit == 'ugN/m3-20C'):
                print 'converting units, temp = 20degC'
                #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                conc = conv_fact*conc
            elif (unit == 'ug/m3-25C') or (unit == 'ugN/m3-25C') or (unit == 'ug/m3at25C'):
                print 'converting units, temp = 25degC'
                #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10)
                conc = conv_fact*conc
            elif (unit == 'mg/m3-20C') or (unit == 'mgN/m3-20C'):
                print 'converting units, temp = 25degC'
                #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                conc = (conv_fact*conc)*1e3
            elif (unit == 'mg/m3-25C') or (unit == 'mgN/m3-25C'):
                print 'converting units, temp = 25degC'
                #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10)
                conc = (conv_fact*conc)*1e3
            elif (unit == 'ppm') or (unit == 'ppmv'):
                conc = conc*1.e3
            elif (unit == 'ppt') or (unit == 'pptv'):
                conc = conc/1.e3
        
            else:
                print 'Unknown Unit'
                print unit
                1+'a'
                break
            
        if tz != 'UTC':
            if tz == '':
                if site_ref.lower() in ['plm']:
                    tz = -5
        
                if site_ref.lower() in ['kos','edm','vdl','nwr']:
                    tz = 0

                if site_ref.lower() in ['jfj','kps','rig','pay','glh','cmn','zep','dig','hhe','ktb','stp','ivn','jcz','kam','lzp','snz','zbl','kmw','don','mhn','nia','roq','spm']: 
                    tz = 1

                if site_ref.lower() in ['rcv','aht','oul','uto','vir','fdt','sem','stn']:
                    tz = 2
                
                if site_ref.lower() in ['dak']:
                    tz = 3
                
                if site_ref.lower() in ['shp']:
                    tz = 4
                    
                if site_ref.lower() in ['isk']:
                    tz = 5
    
                if site_ref.lower() in ['hkg']:
                    tz = 8

                if site_ref.lower() in ['cgo']:
                    tz = 10
            else:        
                tz = tz.replace('LocaltimeUTC', '')
                tz = tz.replace('OtherUTC', '')
                tz = tz.replace('Localtime', '')
                tz = tz.replace(':', '.')
        
                try:
                    before, sep, after = tz.rpartiton('.')
                    after = int(after)
                    conv = (100./60) * after
                    tz = before+sep+str(conv)
                except:
                    1+1 
                tz = float(tz)
        
        else: 
            tz = 0
    
        #check tz is whole number else skip site
        if (tz % 1) != 0:
            print 'File Invalid, timezone is not a whole number.'
            conc[:] = -99999
    
        #process dates from date, time to days since start year
        dates = [s.replace('-', '') for s in dates]			
        times = [s.replace(':', '') for s in times]
    
        if file_res == 'hr':
            #some times go from 0100 to 2400, assume this is when sites report ave for hour previous. Thus all times should have hour minused
            for i in range(len(times)):
                if times[i] == '2400':
                    current_date = dates[i]
                    test = np.array(dates) == current_date
                    indices = [i for i, x in enumerate(test) if x]
                    for x in indices:
                        current_time = times[x]
                        if current_time == '2400':
                            current_time = '0000'
                        date_datetime = datetime.datetime(int(current_date[0:4]),int(current_date[4:6]),int(current_date[6:]),int(current_time[:2]),int(current_time[2:]))
                        date_datetime = date_datetime - datetime.timedelta(hours = 1)
                        times[x] = date_datetime.strftime("%H%M")
    
            #adjust dates and times if tz is not equal to 0
            if tz != 0:
                for i in range(len(dates)):
                    #create datetime
                    dt = datetime.datetime(int(dates[i][:4]),int(dates[i][4:6]),int(dates[i][6:]),int(times[i][:2]),int(times[i][2:]))
                    if tz > 0:
                        #print 'Old dt', dt
                        dt  = dt - datetime.timedelta(hours = int(tz))
                        #print 'New dt', dt
                    elif tz < 0:
                        #print 'Old dt', dt
                        dt  = dt + datetime.timedelta(hours = np.abs(int(tz)))
                        #print 'New dt', dt
                    dates[i] = dt.strftime("%Y%m%d")
                    times[i] = dt.strftime("%H%M")
        
        data = [dates,times,conc]
        try:
            big_list = np.hstack((big_list,data))
        except:
            big_list = np.array(data)    
            
    
        if (s_count == site_file_len):	
          
            #make sure big list exists
            try:
                big_list
            except:
                data_valid = False
            
            if data_valid == True:          
  
                #get dates and times
                date_con = big_list[0,:]
                time_con = big_list[1,:]
              
                #get vals
                vals = np.array(big_list[2,:]).astype(float) 

                #delete big list
                del big_list

                #if dates outside what asked for exclude          
                first_date_val = int('%s0101'%(start_year))
                last_date_val = int('%s1231'%(end_year))
        
                test_valid = (np.array(date_con).astype(int) >= first_date_val) & (np.array(date_con).astype(int) <= last_date_val)
                date_con = date_con[test_valid]
                time_con = time_con[test_valid]
                vals = vals[test_valid]
            
                #Check if any times are duplicate, if so delete all but first
                del_list = []
                for d in range(len(date_con)-1):
                    if (date_con[d] == date_con[d+1]) & (time_con[d] == time_con[d+1]):
                        del_list.append(d+1)
                if len(del_list) > 0:
                    print 'Deleting duplicate timepoints'
                    print date_con[del_list],time_con[del_list]
                    date_con = np.delete(date_con,del_list)
                    time_con = np.delete(time_con,del_list)
                    vals = np.delete(vals,del_list)
            
                #if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
                count=0
                if file_res == 'da':
                    file_hours = len(date_con)
                    for i in range(file_hours):
                        current_hh = int(time_con[count][:2])
                        current_mm = int(time_con[count][2:])
                        s = datetime.datetime(year = start_year, month = 1, day = 1, hour = current_hh, minute = current_mm)
                        e = datetime.datetime(year = start_year, month = 1, day = 2, hour = current_hh, minute = current_mm)
                        day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1]
        
                        date_con = np.insert(date_con,count+1,[date_con[count]]*23)
                        time_con = np.insert(time_con,count+1,day_hours)
                        vals = np.insert(vals,count+1,[vals[count]]*23)
               
                        count +=24
        
            
                if file_res == 'mo':
                    file_hours = len(date_con)
                    for i in range(file_hours):
                        current_year = int(date_con[count][:4])
                        current_month = int(date_con[count][4:6])
                
                        next_month = current_month+1
                        if next_month > 12:
                            next_month = 1
                            next_year = current_year+1
                        else:
                            next_year = current_year 
                
                        s = datetime.datetime(year = current_year, month = current_month, day = 1, hour = 1, minute = 0)
                        e = datetime.datetime(year = next_year, month = next_month, day = 1, hour = 0, minute = 0)
                
                        day_date = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][:-1]
                        day_hour = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][:-1]
                        date_con = np.insert(date_con,count+1,day_date)
                        time_con = np.insert(time_con,count+1,day_hour)
                        vals = np.insert(vals,count+1,[vals[count]]*len(day_date))
                        count += (len(day_date)+1)
        
                date_con = np.array(date_con).astype(int)
                time_con = np.array(time_con).astype(int)
        
                #create max possible o3 grid
                o3_data = np.empty(n_hours)
                o3_data[:] = -99999
                
                #delete dates,times and var outside date range
                val_test = (date_con >= int(output_res_dates_strings[0])) & (date_con <= int(output_res_dates_strings[-1]))
                date_con = date_con[val_test]
                time_con = time_con[val_test]
                vals = vals[val_test]
                
                print date_con
        
                #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                converted_time = modules.date_process(date_con,time_con,start_year)
                converted_time = np.round(converted_time,decimals=5)
                syn_grid_time = np.arange(0,n_days,1./24)
                syn_grid_time = np.round(syn_grid_time,decimals=5)
                #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                indices = np.searchsorted(syn_grid_time, converted_time, side='left')
                o3_data[indices] = vals 
        
                #convert all Nans back to -99999
                test = np.isnan(o3_data)
                o3_data[test] = -99999
        
                #get mode of metadata
                lat = np.float64(stats.mode(all_lat)[0][0]) 
                lon = np.float64(stats.mode(all_lon)[0][0])  
                alt = np.float64(stats.mode(all_alt)[0][0]) 
                st = stats.mode(all_st)[0][0]
                mm = stats.mode(all_mm)[0][0]

                #get measurement type and sampling type (take mode from collected list)
                if (st == 'continuous') or (st == 'continuous(carbondioxide),remotespectroscopicmethod(methaneandsurfaceozone)'):
                    st = 'average'
                elif st == 'flask':
                    st = 'flask'
                elif st == 'filter':
                    st = 'filter'
                else:
                    print st
                    1+'a'

                if mm == 'Lightabsorptionanalysis(UV)':
                    mm = 'ultraviolet photometry'
            
                elif  mm == 'CavityRingdownSpectroscopy':
                    mm = 'cavity ringdown spectroscopy'
            
                elif  mm == 'NDIR':
                    site_mm = 'non-dispersive infrared spectroscopy' 
            
                elif (mm == 'GasChromatography(FID)'): 
                    site_mm = 'gas chromatography flame ionisation detection' 
            
                elif (mm == 'Gas Chromatography (RGD)'):
                    site_mm = 'gas chromatography reduction gas detection'
        
                elif mm == 'Chemiluminescence':
                    mm = 'chemiluminescence'
            
                elif (mm == 'Spectrophotometry') or (mm == 'spectrophotometry,naphthyl-ethylenediaminedihydrochloridemethod'):
                    mm = 'spectrophotometry'
        
                elif mm == '':
                    if species == 'O3':
                        mm = 'ultraviolet photometry'
                    if species == 'CO':
                        mm = 'non-dispersive infrared spectroscopy'
                    if species == 'NO2':
                        mm = 'chemiluminescence'
                    if species == 'NO':
                        mm = 'chemiluminescence'
                    if species == 'ISOP':
                        mm = 'gas chromatography flame ionisation detection'
                
                #do data quality checks        
                full_data,data_valid = modules.quality_check_nr(o3_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)
        
                #convert file res to standard format
                if file_res == 'hr':
                    file_res = 'H'
                elif file_res == 'da':
                    file_res = 'D'
                elif file_res == 'mo':
                    file_res = 'M'
                    
                #no raw class so set as na
                raw_class_name = 'na'
                
                anthrome_class_name = 'na'

                return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def site_iter_process(valid_refs,c):
#for r in range(len(valid_refs)):
    ref = valid_refs[c]
    print ref
    
    #get site instrument for species
    met_i = met_refs.index(ref)
    print len(met_refs)
    print len(met_species)
    site_species = list(met_species[met_i])
    site_instruments = list(met_instruments[met_i])
    print site_species
    print site_instruments
    mm = site_instruments[site_species.index(species)]
    
    site_resolutions = []
    data_valid = True

    s_files = insensitive_glob('/work/home/db876/observations/surface/%s/EANET/*%s.csv'%(species,ref))
    site_files = []
    for y in year_array:
        for f in s_files:
            if str(y)[-2:] in f:
                site_files.append(f)
                           
    site_files = modules.natsorted(site_files)
    
    if site_files == []:
        print 'No files for ref.\n'
    
    years = []
    months = []
    days = []
    hours = []

    vals = []
    
    last_year_index = len(site_files)
    for y in year_array:
        print 'Processing Year %s'%y 
        got_year = False
        for file in site_files:
            last_file_split = file.split('/')[-1]
            if str(y)[2:] in last_file_split:
                got_year = True
                break
        if got_year == False:
            #fill in data for missing year
            timedelta_diff = datetime.date(y+1, 1, 1) - datetime.date(y, 1, 1)
            ndays_missing = timedelta_diff.days
            print 'ndays missing = ', ndays_missing
            
            vals=np.append(vals,[-99999]*(ndays_missing*24))
            
            continue
            
        print file
        
        valid = True
        with open(file, 'rb') as f:
            reader = csv.reader(f,delimiter=',')
            counter = 0
            
            #get resolution
            for row in reader:
                if counter == 0:
                    all_units = row
            
                elif counter == 1:   
                    file_res = 'H'
            
                    try:
                        hour_index = row.index('Hour')
                    except:
                        file_res = 'D'
                    try:
                        day_index = row.index('Day')
                    except:
                        file_res = 'M'
                    month_index = row.index('Month')
                    year_index = row.index('Year')
                    
                    try:
                        spec_index = row.index(species.upper())
                        units = all_units[spec_index] 
                    except:
                        valid = False
                        break
                    
                    #make sure each year units are ppb
                    if units != 'ppb':
                        print 'Units not ppb!'
                        1+'a'
                        
                if counter == 2:
                    if file_res == 'H':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = row[hour_index]
                    elif file_res == 'D':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = 1
                    elif file_res == 'M':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = 1
                        hh = 1
        
                    start_datetime = datetime.datetime(int(yyyy),int(mm),int(dd),int(hh))
                
                if counter == 3:
                    if file_res == 'H':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = row[hour_index]
                    elif file_res == 'D':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = row[day_index]
                        hh = 1
                    elif file_res == 'M':
                        yyyy = row[year_index]
                        mm = row[month_index]
                        dd = 1
                        hh = 1
            
                    present_datetime = datetime.datetime(int(yyyy),int(mm),int(dd),int(hh))
                
                    time_delt = present_datetime-start_datetime
                    hour_delt = datetime.timedelta(hours=1)
                    day_delt = datetime.timedelta(hours=24)
                    week_delt = datetime.timedelta(hours=24*7)
                    month_delt = datetime.timedelta(hours=24*28)
                
                    print time_delt
            
                    if (time_delt < day_delt):
                        print 'Hourly Data'
                        file_res = 'H'
                        site_resolutions.append(file_res)
                
                    elif (time_delt > hour_delt) & (time_delt < week_delt):
                        print 'Daily Data'
                        file_res = 'D'
                        site_resolutions.append(file_res)
                
                    elif (time_delt > week_delt):
                        print 'Monthly Data'
                        file_res = 'M'
                        site_resolutions.append(file_res)
                
                    #break
                    #limit files by timeres return if not suitable for output res
                    if output_res == 'H':
                        if (file_res == 'D') or (file_res == 'M'):
                            print 'File resolution has to be Minimum Hourly. Skipping'
                            data_valid = False
                            return c,vals,data_valid,-999,-999,-999,'na','na','na','na','na'
                    elif (output_res == 'D'):
                        if (file_res == 'M'):
                            print 'File resolution has to be Minimum Daily. Skipping'
                            data_valid = False
                            return c,vals,data_valid,-999,-999,-999,'na','na','na','na','na'        
                                    
                counter+=1
        
        #READ IN DATA   
        if valid == True:             
            with open(file, 'rb') as f:       
                reader = csv.reader(f,delimiter=',')
                counter = 0
                for row in reader:
            
                    if counter >= 2:
                        yyyy = row[year_index]
                        mm = row[month_index]                    
                
                        if file_res == 'H':
                            try:
                                vals=np.append(vals,np.float64(row[spec_index]))
                            except:
                                vals=np.append(vals,-99999)
                
                        elif file_res == 'D':
                            try:
                                vals=np.append(vals,[np.float64(row[spec_index])]*24)
                            except:
                                vals=np.append(vals,[-99999]*24)
                        
                        elif file_res == 'M':
                            month_days = monthrange(int(yyyy), int(mm))[1]
                            try:
                                vals=np.append(vals,[np.float64(row[spec_index])]*(month_days*24))
                            except:
                                vals=np.append(vals,[-99999]*(month_days*24))
                            
                    counter+=1
        else:
            print 'Species is not in file header. Skipping Year'
            timedelta_diff = datetime.date(y+1, 1, 1) - datetime.date(y, 1, 1)
            ndays_missing = timedelta_diff.days
            print 'ndays missing = ', ndays_missing
            vals=np.append(vals,[-99999]*(ndays_missing*24))
            
            
    valid_refs_rev.append(ref)                    
    
    i_ref = met_refs.index(ref)
    tz = np.float64(met_tz[i_ref])
    lat = np.float64(met_lats[i_ref])
    lon = np.float64(met_lons[i_ref])
    alt = np.float64(met_alts[i_ref])
    raw_class_name = met_class[i_ref]
    
    #check tz is whole number else skip site
    if (tz % 1) != 0:
        print 'Timezone is not even. Skipping'
        data_valid = False
    
    tz = int(tz)
    #correct time to UTC
    if tz < 0:
        #get rid of values at start and append -99999's at end
        cut = vals[:tz]
        for num in range(np.abs(tz)):
            cut = np.insert(cut,0, -99999)
        vals = cut
    elif tz > 0:
        #put -99999's at start and get rid of values at end
        cut = vals[tz:]
        for num in range(tz):
            cut = np.append(cut, -99999)
        vals = cut
        
    #do data quality checks        
    full_data,data_valid = modules.quality_check_nr(vals,data_valid,data_resolution,np.float64(alt),grid_dates,start_year,end_year)
     
    #if all site resolutions are same continue, make program exit
    all_same = all(x == site_resolutions[0] for x in site_resolutions)
    if all_same == True:
        pass
    else:
        print 'Not all files for site have same resolution. Skipping.'
        data_valid = False
        return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na'   
     
    #set sampling as average
    st = 'average' 
    
    anthrome_class_name = 'na'
     
    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res 
def site_iter_process(valid_refs,c):
#read files site at a time
#for ref_i in range(len(valid_refs)):
    site_ref = valid_refs[c]

    all_latitudes = []
    all_longitudes = []
    all_altitudes = []
    all_mm = []

    print 'Current Ref is = ', site_ref
    #find if sites have full valid range from start year and finishing in end year
    s_files = glob.glob('/work/home/db876/observations/surface/%s/EMEP/%s*'%(species,site_ref))
    year_files = [file.replace("/work/home/db876/observations/surface/%s/EMEP/"%(species), "") for file in s_files]
    cut_year_files = [file[8:12] for file in year_files]
    site_files = []
    for y in year_array:
        for i in range(len(s_files)):
            if str(y) in cut_year_files[i]:
                site_files.append(s_files[i])
                  
    site_files = modules.natsorted(site_files)
    year_files = modules.natsorted(year_files)
  
    file_startdate = []
    file_height = []
    instr_names = []
    file_lasttime = []
    
    data_valid = True

    yyyymmdd = []
    hhmm = []
    vals = []
    flags = []

    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999

    if site_files == []:
        print 'No valid files for site\n'
        return
    
    for y in year_array:
    
        print 'Processing Year %s'%y 
        got_year = False
        for file in site_files:
            last_file_split = file.split('/')[-1]
            if str(y) in last_file_split[8:12]:
                got_year = True
                break
        if got_year == False:
            #fill in data for missing year
            timedelta_diff = datetime.date(y+1, 1, 1) - datetime.date(y, 1, 1)
            ndays_missing = timedelta_diff.days
            print 'ndays missing = ', ndays_missing        
            continue
    
        if data_valid == True:
            data_start = 9999999
            count = 0
            start_read = False
            with open(file, 'rb') as f:
                read_count = 0
                reader = csv.reader(f,delimiter=' ')
                print file
                for row in reader:
                    try:
                        row = filter(lambda a: a != '', row)
                    except:
                        pass
                    try:
                        row = filter(lambda a: a != ',', row)
                    except:
                        pass
                                    
                    #get start date of file
                    if row[0] == 'Startdate:':
                        data = row[1]
                        s_yyyy = data[:4]
                        s_mm = data[4:6]
                        s_dd = data[6:8]
                        s_hh = data[8:10]
                        s_min = data[10:12]


                        start_datetime = datetime.datetime(int(s_yyyy),1,1,0,0)
                    
                    #get unit
                    if row[0] == 'Unit:':
                        try:
                            unit_part1 = row[1]
                            unit_part2 = row[2]
                            unit = unit_part1+'_'+unit_part2
                        except:
                            unit = row[1]   
            
                    #get resolution
                    if row[0] == 'Resolution':
                        if row[1] == 'code:':
                            file_res = row[2]
                            print 'Resolution = %s'%file_res
                            if (output_res == 'H'):
                                if (file_res == '1d') or (file_res == '1mo'):
                                    print 'File resolution has to be Minimum Hourly. Skipping'
                                    data_valid = False
                                    return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na'
                            elif (output_res == 'D'):
                                if (file_res == '1mo'):
                                    print 'File resolution has to be Minimum Daily. Skipping'
                                    data_valid = False
                                    return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na'
                    #get latitude
                    if row[0] == 'Station':
                        if row[1] == 'latitude:':
                            latitude = row[2]
                            all_latitudes.append(latitude)
                
                    #get longitude
                    if row[0] == 'Station':
                        if row[1] == 'longitude:':
                            longitude = row[2]
                            all_longitudes.append(longitude)
                        
                    #get altitude
                    if row[0] == 'Station':
                        if row[1] == 'altitude:':
                            altitude = row[2][:-1]
                            all_altitudes.append(altitude)
                
                    #get period
                    if row[0] == 'Period':
                        period_code = row[2]
                    
                    #get stats method
                    if row[0] == 'Statistics:':
                        try:
                            st = row[1] + row[2]
                            if st != 'arithmeticmean':
                                print 'Not Arithmetic Mean!'
                                print row[1]
                                print 1+'a'  
                        except:
                            print 'Not Arithmetic Mean!'
                            print row[1]
                            print 1+'a'
                
                    #get instrument method
                    if row[0] == 'Instrument':
                        if row[1] == 'type:':
                            mm_list = row[2:]
                            if len(mm_list) > 1:
                                site_mm = ''
                                for x in range(len(mm_list)):
                                    site_mm = site_mm+mm_list[x]+' '
                                site_mm = site_mm.strip()
                            else:
                                site_mm = mm_list[0]
                            all_mm.append(site_mm)
                    
                    #get data
                    if start_read == True:
                        #calc dates, times, and take o3 vals

                        time_since_start = np.float64(row[0])
                        days_since_start = math.trunc(time_since_start)
                        remainder = time_since_start - days_since_start
                        unrounded_hour = remainder*24
                        hour = np.round(unrounded_hour)
                        time_delta = datetime.timedelta(days = days_since_start,hours = hour)
                        calc_datetime = start_datetime + time_delta
                        calc_yyyymmdd = calc_datetime.strftime("%Y%m%d") 
                        calc_hhmm = calc_datetime.strftime("%H%M")        
                            
                        line_val = np.float64(row[2])
                    
                        #convert units by line (only if value is >= than 0
                        if line_val >= 0:
                            if (unit.lower() != 'ppb') & (unit.lower() != 'ppbv'):
                                if unit == 'ug/m3':
                                    #print 'converting units, temp = 20degC'
                                    #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for O3 instruments
                                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                                    line_val = conv_fact*line_val
                                    #print 'Converting Units from ug/m3 20degC to ppbv'
                                elif unit == 'ug_N/m3':
                                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                                    line_val = conv_fact*line_val
                                    #print 'Converting Units from ug/Nm3 20degC to ppbv' 
                                elif (unit == 'ppm') or (unit == 'ppmv'):
                                    line_val = line_val*1e3
                                    #print 'Converting Units from ppmv to ppbv'
                                elif (unit == 'ppt') or (unit == 'pptv'):
                                    line_val = line_val/1e3
                                    #print 'Converting Units from pptv to ppbv'
                                else:
                                    print 'Unknown Unit'
                                    data_valid = False
                                    1+'a'
                       
                        if file_res == '1h':
                            yyyymmdd=np.append(yyyymmdd,calc_yyyymmdd)
                            hhmm=np.append(hhmm,calc_hhmm)
                            vals = np.append(vals,line_val)
                            flags = np.append(flags,np.float64(row[3]))
                    
                        elif file_res == '1d':
                            yyyymmdd=np.append(yyyymmdd,calc_yyyymmdd)
                            hhmm=np.append(hhmm,'0000')
                            vals = np.append(vals,line_val)
                            flags = np.append(flags,np.float64(row[3]))
                        
                            for j in range(1,24):
                                time_delta = datetime.timedelta(days = days_since_start,hours = j)
                                calc_datetime = start_datetime + time_delta 
                                vals = np.append(vals,vals[-1])
                                flags = np.append(flags,flags[-1])
                                yyyymmdd = np.append(yyyymmdd,calc_datetime.strftime("%Y%m%d"))
                                hhmm = np.append(hhmm,calc_datetime.strftime("%H%M"))
                        
                        elif file_res == '1mo':
                            yyyymmdd=np.append(yyyymmdd,calc_yyyymmdd)
                            hhmm=np.append(hhmm,'0000')
                            vals = np.append(vals,line_val)
                            flags = np.append(flags,np.float64(row[3]))
                        
                            month_days = monthrange(int(yyyymmdd[-1][:4]), int(yyyymmdd[-1][4:6]))[1]
                            for j in range(1,24*month_days):
                                time_delta = datetime.timedelta(days = days_since_start,hours = j)
                                calc_datetime = start_datetime + time_delta
                                vals = np.append(vals,vals[-1])
                                flags = np.append(flags,flags[-1])
                                yyyymmdd = np.append(yyyymmdd,calc_datetime.strftime("%Y%m%d"))
                                hhmm = np.append(hhmm,calc_datetime.strftime("%H%M"))
        
                    if row[0] == 'starttime':
                        start_read = True
                
                    count+=1
                
    if (y == year_array[-1]):    
            
        #convert all invalids by flags to -99999
        test_inv = flags != 0
        if len(test_inv) != 0:
            vals[test_inv] = -99999
        
        #any values less than zero are -99999
        test_inv = vals < 0
        if len(test_inv) != 0:
            vals[test_inv] = -99999
        
        #do additional invalid test, as flags not always correct
        #test_inv_2 = vals > 300
        #vals[test_inv_2] = -99999

        #put o3 vals into full grid
        date_con = np.array(yyyymmdd).astype(int)
        time_con = np.array(hhmm).astype(int)
        
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        converted_time = date_process(date_con,time_con,start_year)
        converted_time = np.round(converted_time,decimals=5)
        syn_grid_time = np.arange(0,n_days,1./24)
        syn_grid_time = np.round(syn_grid_time,decimals=5)
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    
        indices = np.searchsorted(syn_grid_time, converted_time, side='left')
        vals = np.array(vals)
        #make sure no data is past end year
        index_test = indices < len(full_data)
        indices = indices[index_test]
        vals = vals[index_test]
        full_data[indices] = vals
    
    #get mode of metadata
    lat = np.float64(stats.mode(all_latitudes)[0][0]) 
    lon = np.float64(stats.mode(all_longitudes)[0][0])  
    alt = np.float64(stats.mode(all_altitudes)[0][0]) 
    mm = stats.mode(all_mm)[0][0]
    
    #get measurement method
    if (mm == 'uv_abs') or (mm == 'chemiluminesc') or (mm == 'uv_fluoresc'):
        if species == 'O3':
            mm = 'ultraviolet photometry'
        if (species == 'NO') or (species == 'NO2') or (species == 'CO'):
            mm = 'chemiluminescence'
        
    elif (mm == 'ndir') or (mm == 'infrared_absorption'):
        mm = 'non-dispersive infrared spectroscopy'
        
    elif (mm == 'GC-HgO'):
        mm = 'gas chromatography reduction gas detection'
    
    elif (mm == 'tracegas_monitor'):
        mm = 'cavity attenuated phase shift spectroscopy'
    
    elif (mm == 'filter_1pack') or (mm == 'filter_2pack') or (mm == 'filter_3pack'):
        if species == 'NO2':
            mm = 'griess saltzman colorimetric'
        elif species == 'CO':
            mm = 'ion chromatography'
        
    elif (mm == 'steel_canister'):
        mm = 'gas chromatography flame ionisation detection'
        
    elif (mm == 'online_gc'):
        mm = 'online gas chromatography'
    
    elif (mm == 'glass_sinter') or (mm == 'abs_solution') or (mm == 'filter_abs_solution') or (mm == 'abs_tube') or (mm == 'continuous_colorimetric'):
        mm = 'griess saltzman colorimetric'
        
    elif (mm == 'NaJ_solution'):
        mm = 'flame ionisation detection'
        
    elif (mm == 'doas'):
        mm = 'differential optical absorption spectrosocopy'
    
    elif (mm == 'diffusion_tube'):
        mm = 'diffusive sampler'
    
    elif (mm == 'NA') or (mm == ''):
        if species == 'O3':
            mm = 'ultraviolet photometry'
        if species == 'CO':
            mm = 'non-dispersive infrared spectroscopy'
        if species == 'NO2':
            mm = 'chemiluminescence'
        if species == 'NO':
            mm = 'chemiluminescence'
        if species == 'ISOP':
            mm = 'gas chromatography flame ionisation detection'
        
    else:
        print mm
        1+'a'
    
    #do data quality checks        
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

    #convert file res to standard format
    if file_res == '1h':
        file_res = 'H'
    elif file_res == '1d':
        file_res = 'D'
    elif file_res == '1mo':
        file_res = 'M'

    #no raw class so set as na
    raw_class_name = 'na'
    
    #set sampling as average
    st = 'average'

    anthrome_class_name = 'na'

    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def site_iter_process(valid_refs,c):
#process data
#for i in range(n_refs):

    data_valid = True
    
    site_data = data[c]
    site_meta = site_data[0]    
    file_res = resolutions[c]    
    
    #get data and metadata
    
    latitudes= [site_meta['LATITUDE']]
    longitudes = [site_meta['LONGITUDE']]
    altitudes = [site_meta['ALTITUDE']]
    land_use_classes = [site_meta['LAND_USE']]
    station_classes = [site_meta['STATION CATEGORY']]
    all_mm = [site_meta['MEASUREMENT METHOD']]
    
    
    if (file_res == 'hr') or (file_res == 'da'):
        var = np.array(site_data[1].values.tolist())
    elif file_res == 'mo':
        all_var = np.array(site_data[1].values.tolist())
        var = all_var[:,1]
        end_times = all_var[:,0]
        end_date_con = [d[:4]+d[5:7]+d[8:10] for d in end_times]
        end_time_con = [d[11:13]+d[14:] for d in end_times]
        
    times = site_data[1].index
    print times
    date_con = [d.strftime('%Y%m%d') for d in times]
    time_con = [d.strftime('%H%M') for d in times]
    
    #get ref
    site_ref = valid_refs[c]
    site_group = group_codes[c]
    
    print 'ref == %s'%(site_ref) 
    print 'res = ',file_res
    
    
    #if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
    count=0
    if file_res == 'da':
        file_hours = len(date_con)
        for i in range(file_hours):
            current_hh = int(time_con[count][:2])
            current_mm = int(time_con[count][2:])
            s = datetime.datetime(year = start_year, month = 1, day = 1, hour = current_hh, minute = current_mm)
            e = datetime.datetime(year = start_year, month = 1, day = 2, hour = current_hh, minute = current_mm)
            day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1]

            date_con = np.insert(date_con,count+1,[date_con[count]]*23)
            time_con = np.insert(time_con,count+1,day_hours)
            var = np.insert(var,count+1,[var[count]]*23)
       
            count +=24

    
    if file_res == 'mo':
        file_hours = len(date_con)
    
        for i in range(file_hours):
            current_year = int(date_con[count][:4])
            current_month = int(date_con[count][4:6])
            current_day = int(date_con[count][6:])
            current_hour = int(time_con[count][:2])
        
            next_year = int(end_date_con[i][:4])
            next_month = int(end_date_con[i][4:6])
            next_day = int(end_date_con[i][6:])
            next_hour = int(end_time_con[i][:2])
        
            s = datetime.datetime(year = current_year, month = current_month, day = current_day, hour = current_hour, minute = 0)
            e = datetime.datetime(year = next_year, month = next_month, day = next_day, hour = next_hour, minute = 0)
        
            day_date = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][:-1]
            day_hour = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][:-1]
            date_con = np.insert(date_con,count+1,day_date)
            time_con = np.insert(time_con,count+1,day_hour)
            var = np.insert(var,count+1,[var[count]]*len(day_date))
            count += (len(day_date)+1)

    date_con = np.array(date_con).astype(int)
    time_con = np.array(time_con).astype(int)
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    
    full_data = np.empty(len(grid_dates))
    full_data[:] = -99999
 
    full_data[indices] = var
    
    #convert nans to -99999's
    nan_inds = np.isnan(full_data)
    full_data[nan_inds] = -99999
    
    #get mode of metadata
    lat =  np.float64(stats.mode(latitudes)[0][0]) 
    lon =  np.float64(stats.mode(longitudes)[0][0])  
    alt = np.float64(stats.mode(altitudes)[0][0]) 
    land_use_class = stats.mode(land_use_classes)[0][0]
    raw_class_name = stats.mode(station_classes)[0][0]
    mm = stats.mode(all_mm)[0][0]
        
    
    #get measurement method
    if (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O331M UV Ozone Analyzer') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9800') or (mm == 'Ultraviolet (UV) photometryThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryMCV 48-AUV') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400A UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 48 CO analyser') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400E UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 300 O3 analyser') \
    or (mm == 'Ultraviolet (UV) photometry342 M') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9812 O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 350E O3 analyser') or (mm == 'Ultraviolet (UV) photometryENVIRONMENT 1003 AH') or (mm == 'Ultraviolet (UV) photometryC.S.I. 3.100') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003 O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810B O3 analyser') or (mm == 'Ultraviolet (UV) photometrytoo generic') or (mm == 'Ultraviolet (UV) photometryThermo 49 CPS Ozone Primary Standard') \
    or (mm == 'Ultraviolet (UV) photometryDASIBI') or (mm == 'UV fluorescencetoo generic') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003-PC O3 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryThermo model 49i O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-PC O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-RS O3 analyser') or (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O341M UV Ozone Analyzer') or (mm == 'Ultraviolet (UV) photometryISEO Argopol-SAM-XAIR')  \
    or (mm == 'Ultraviolet (UV) photometryEnvironnement S.A. Model O342M UV Ozone Analyze') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 370 O3 analyser') or (mm == 'spectrophotometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008-AH O3 analyser') or (mm == 'UV fluorescenceThermo 49c' ) or (mm == 'Ultraviolet (UV) photometryPHILIPS K50110/00 UV Photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 8810 O3 analyser') or (mm == 'Ultraviolet (UV) photometryPHILIPS K50094 API 400') or (mm == 'Ultraviolet (UV) photometryORION') or (mm == 'Ultraviolet (UV) photometryThermo model 49w O3 analyser') \
    or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810 O3 analyser') or (mm == 'Ultraviolet (UV) photometryCOLUMBIA SCIENTIFIC IC 3100') or (mm == 'Ultraviolet (UV) photometry2008A') or (mm == 'Ultraviolet (UV) photometryThermo model 43s SO2 analyser') or (mm == 'Ultraviolet (UV) photometryMLU') or (mm == 'Ultraviolet (UV) photometryThermo model 49 O3 analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 1108 O3 analyser') or (mm == 'Ultraviolet (UV) photometryAMIBRACK') or (mm == 'Ultraviolet (UV) photometryThermo model 49c O3 analyser') or (mm == 'UV fluorescenceUNKNOWN') or (mm == 'Ultraviolet (UV) photometryTeledyne API 400 UV photometric O3 analyser') \
    or (mm == 'UV fluorescenceTeledyne API 400 UV photometric O3 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9830 CO analyser') or (mm == 'Ultraviolet (UV) photometryDASIBI 5014') or (mm == 'Ultraviolet (UV) photometryEnvironics 300/ Environics') or (mm == 'Ultraviolet (UV) photometryANALYSIS AUTOMATION Mod. 427') or (mm == 'Ultraviolet (UV) photometryANALYSIS AUTOMATION') or (mm == 'Ultraviolet (UV) photometryDASIBI 1008 O3 analyser') or (mm == 'ultraviolet absorptionORION') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9811 O3 analyser') or (mm == 'Ultraviolet (UV) photometryENVIRONMENT 1003RS') \
    or (mm == 'UV absorption (ref)UNKNOWN') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Environnement S.A. SANOA Multigas Longpath Monitoring System') or (mm == 'Ultraviolet (UV) photometryDASIBI 1003-RS O3 analyser') or (mm == 'Ultraviolet (UV) photometryHoriba model APOA 350 O3 analyser') or (mm == 'Ultraviolet (UV) photometrySFI O342M') or (mm == 'UV fluorescenceMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryDANI ENVIRONMENT 1003 AH') or (mm == 'Ultraviolet (UV) photometryS-5014') or (mm == 'Ultraviolet (UV) photometryThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryUNKNOWN') \
    or (mm == 'Ultraviolet (UV) photometryHoriba model APNA 360 NOx analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs undetermined') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'UV fluorescenceThermo model 42 NO/Nox analyser') or (mm == 'Ultraviolet (UV) photometryContiflo') or (mm == 'Ultraviolet (UV) photometryTeledyne API undertermined') or (mm == 'UV fluorescenceThermo model 43a SO2 analyser') or (mm == 'UV fluorescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm == 'UV fluorescenceThermo model 43c SO2 analyser') \
    or (mm =='Ultraviolet (UV) photometryTeledyne API undertermined') or (mm =='UV fluorescenceUNKNOWN') or (mm =='UV fluorescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm =='Ultraviolet (UV) photometryUNKNOWN') or (mm == 'Ultraviolet (UV) photometryThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryMonitor Labs model 9810 O3 analyser') \
    or (mm == 'ChemiluminescenceTeledyne API undertermined') or (mm == 'ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360 NOx analyser') or (mm == 'ChemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC31M NO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 14B chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841 NOx analyser') or (mm == 'ChemiluminescenceENVIRONMENT ZC 32M') or (mm == 'ChemiluminescenceHoriba model APNA 300 NOx analyser') or (mm == 'chemiluminescenceENVIRONNEMENT AC 30M') \
    or (mm == 'ChemiluminescenceThermo model 42i NO/Nox analyser') or (mm == 'ChemiluminescenceTeledyne API 400 UV photometric O3 analyser') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION') or (mm == 'ChemiluminescenceMonitor Labs model 8941A NOx analyser') or (mm == 'ChemiluminescenceTeledyne API undertermined') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC32M NO2 Analyzer') or (mm == 'ChemiluminescenceTeledyne API 200E chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceHoriba model APHA 360E hydrocarbons analyser') or (mm == 'ChemiluminescenceMELOY S1600') or (mm == 'ChemiluminescenceECO PHYSICS CLD 700') or (mm == 'ChemiluminescenceORION') \
    or (mm == 'ChemiluminescenceTECAN CLD 502') or (mm == 'ChemiluminescenceMonitor Labs model 9850 SO2 analyser') or (mm == 'ChemiluminescenceECO PHYSICS CLD 700 AL') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC30M NO2 Analyzer') or (mm == 'ChemiluminescenceMCV 30-QL') or (mm == 'ChemiluminescenceAMBIRACK') or (mm == 'ChemiluminescenceTeledyne API 100A UV Fluorescent SO2 Analyser') or (mm == 'ChemiluminescenceS-5012') or (mm == 'ChemiluminescenceAirpointer') or (mm == 'ChemiluminescenceThermo model 42c NO/Nox analyser') or (mm == 'ChemiluminescenceThermo model 42i-TL (Trace level Nox)') or (mm == 'ChemiluminescenceMonitor Labs model 9841T NOx analyser') \
    or (mm == 'ChemiluminescenceThermo model 42 NO/Nox analyser') or (mm == 'ChemiluminescenceMonitor Labs model 8841 NOx analyser') or (mm == 'ChemiluminescenceColumbia Scientific Industries Models 1600') or (mm == 'chemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION Mod. 447') or (mm == 'ChemiluminescenceSFI AC32M') or (mm == 'ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm == 'Chemiluminescenceserinus 40 Nox') or (mm == 'ChemiluminescenceThermo model 42s NO/Nox analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360 NOx analyser') or (mm == 'ChemiluminescenceThermo model 42C-TL (Trace level Nox)') \
    or (mm == 'ChemiluminescenceTeledyne API 200 chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 8440 NOx analyser') or (mm == 'ChemiluminescencePHILIPS K50034 API 200A') or (mm == 'ChemiluminescenceENVIRONMENT') or (mm == 'ChemiluminescenceMonitor Labs model 8840 NOx analyser') or (mm == 'chemiluminescenceHORIBA APNA 370') or (mm == 'ChemiluminescenceMonitor Labs undetermined') or (mm == 'ChemiluminescencePHILIPS 42') or (mm == 'ChemiluminescencePHILIPS K50109/00 Gas Filter Correlation CO analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm == 'ChemiluminescenceThermo model 43 SO2 analyser') \
    or (mm == 'ChemiluminescenceHoriba model APNA 350 NOx analyser') or (mm == 'ChemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceTHERMO ELECTRON INSTRUMENTS') or (mm == 'ChemiluminescenceLAP 884') or (mm == 'ChemiluminescenceMonitor Labs model 9841A NOx analyser') or (mm == 'ChemiluminescenceHoriba model APNA 370 NOx analyser') or (mm == 'ChemiluminescenceDASIBI 2108 NOx analyser') or (mm == 'ChemiluminescenceThermo model 14B/E chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AF22M SO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 42w NO/Nox analyser') or (mm == 'ChemiluminescenceHoriba model APNA 360E NOx analyser') \
    or (mm == 'Chemiluminescencetoo generic') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm == 'ChemiluminescencePHILIPS K50235/00 NO-NOx-NO2 analyser') or (mm == 'ChemiluminescenceEnvironnement S.A. Model AC31M NO2 Analyzer') or (mm == 'ChemiluminescenceThermo model 14B chemiluminescence NO-NO2-Nox') or (mm == 'ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'ChemiluminescenceMonitor Labs model 9841 NOx analyser') or (mm =='ChemiluminescenceENVIRONMENT ZC 32M') or (mm =='ChemiluminescenceBENDIX') or (mm =='ChemiluminescenceThermo model 42i NO/Nox analyser') \
    or (mm =='ChemiluminescenceTeledyne API 400 UV photometric O3 analyser') or (mm =='Ultraviolet (UV) photometryHoriba model APNA 360 NOx analyser') or (mm =='ChemiluminescenceThermo model 48 CO analyser') or (mm =='ChemiluminescenceMonitor Labs model 8941A NOx analyser') or (mm =='ChemiluminescenceTeledyne API undertermined') or (mm =='ChemiluminescenceEnvironnement S.A. Model AC32M NO2 Analyzer') or (mm =='ChemiluminescenceTeledyne API 200E chemiluminescent NOx analyser') or (mm =='ChemiluminescenceHoriba model APHA 360E hydrocarbons analyser') or (mm =='ChemiluminescenceECO PHYSICS CLD 700') or (mm =='ChemiluminescenceORION') \
    or (mm =='ChemiluminescenceTECAN CLD 502') or (mm =='ChemiluminescenceMonitor Labs model 9850 SO2 analyser') or (mm =='ChemiluminescenceECO PHYSICS CLD 700 AL') or (mm =='ChemiluminescenceEnvironnement S.A. Model AC30M NO2 Analyzer') or (mm =='ChemiluminescenceMCV 30-QL') or (mm =='ChemiluminescenceBendix/Combustion Engineering Model 8101-C Oxides of Nitrogen Analyze') or (mm =='ChemiluminescenceTeledyne API 100A UV Fluorescent SO2 Analyser') or (mm =='ChemiluminescenceS-5012') or (mm =='ChemiluminescenceHoriba model APNA 300E NOx analyser') or (mm =='ChemiluminescenceThermo model 42c NO/Nox analyser') \
    or (mm =='ChemiluminescenceMonitor Labs model 8440 NOx analyser') or (mm =='ChemiluminescenceThermo model 42i-TL (Trace level Nox)') or (mm =='ChemiluminescenceThermo model 42 NO/Nox analyser') or (mm =='ChemiluminescenceMonitor Labs model 8841 NOx analyser') or (mm =='ChemiluminescenceColumbia Scientific Industries Models 1600') or (mm =='chemiluminescenceUNKNOWN') or (mm == 'ChemiluminescenceANALYSIS AUTOMATION Mod. 447') or (mm =='ChemiluminescenceAirpointer') or (mm =='ChemiluminescenceHoriba model APNA 350E NOx analyser') or (mm =='ChemiluminescenceThermo model 42s NO/Nox analyser') or (mm =='ChemiluminescenceHoriba model APNA 360 NOx analyser') \
    or (mm =='ChemiluminescenceTeledyne API 200 chemiluminescent NOx analyser') or (mm =='ChemiluminescencePHILIPS K50034 API 200A') or (mm =='ChemiluminescenceENVIRONMENT') or (mm =='ChemiluminescenceMonitor Labs model 8840 NOx analyser') or (mm =='Beta ray attenuationTeledyne API 200A chemiluminescent NOx analyser') or (mm =='ChemiluminescenceMonitor Labs undetermined') or (mm =='ChemiluminescencePHILIPS K50102 NO') or (mm =='Chemiluminescencetoo generic') or (mm =='ChemiluminescenceThermo model 42C-TL (Trace level Nox)') or (mm =='ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm =='ChemiluminescenceTHERMO ENVIRONMENTAL INSTRUMENTS') \
    or (mm =='ChemiluminescenceHoriba model APNA 350 NOx analyser') or (mm =='ChemiluminescenceUNKNOWN') or (mm =='ChemiluminescenceTHERMO ELECTRON INSTRUMENTS') or (mm =='ChemiluminescenceLAP 884') or (mm =='ChemiluminescenceMonitor Labs model 9841A NOx analyser') or (mm =='ChemiluminescenceHoriba model APNA 370 NOx analyser') or (mm =='ChemiluminescenceDASIBI 2108 NOx analyser') or (mm =='ChemiluminescenceThermo model 14B/E chemiluminescence NO-NO2-Nox') or (mm =='ChemiluminescenceThermo model 42w NO/Nox analyser') or (mm =='ChemiluminescenceHoriba model APNA 360E NOx analyser') or (mm =='ChemiluminescenceEC9843') or (mm =='ChemiluminescencePHILIPS K50109/00 Gas Filter Correlation CO analyser') \
    or (mm =='ChemiluminescenceEnvironnement S.A. Model AF21M SO2 Analyzer') or (mm =='ChemiluminescencePHILIPS K50235/00 NO-NOx-NO2 analyser') or (mm =='ChemiluminescenceTeledyne API 200A chemiluminescent NOx analyser') or (mm =='ChemiluminescenceEnvironnement S.A. Model CO12M CO Analyzer') or (mm =='ChemiluminescenceMonitor Labs model 9841B NOx analyser') or (mm =='ChemiluminescenceUNKNOWN') or (mm =='Chemiluminescencetoo generic') or (mm =='Beta ray attenuationMLU') or (mm =='Beta ray attenuationORION') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser') or (mm == 'UV fluorescenceHoriba model APNA 360 NOx analyser') \
    or (mm == 'UV fluorescenceUNKNOWN') or (mm == 'UV fluorescenceThermo model 43 SO2 analyser') or (mm == 'Ultraviolet (UV) photometryTeledyne API 200A chemiluminescent NOx analyser'):
        
        if species == 'O3':
            mm = 'ultraviolet photometry'
        elif (species == 'NO') or (species == 'NO2') or (species == 'CO'):
            mm = 'chemiluminescence'
        else:
            1+'a'
    
    if (mm =='Non-dispersive infrared spectroscopy (NDIR)Meloy Model SA 700 Fluorescence Sulfur Dioxide Analyze') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9830B CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 8831 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ORION') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 200A chemiluminescent NOx analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ANALYSIS AUTOMATION') or (mm =='Non-dispersive infrared spectroscopy (NDIR)THERMO ELECTRON INSTRUMENTS') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 43a SO2 analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 8830 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)CO ANALAYZER') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO12M CO Analyzer') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48i CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)too generic') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS K50093 API 300A') or (mm =='Non-dispersive infrared spectroscopy (NDIR)MLU') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 300 CO analyser') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)MLU 300') or (mm =='Non-dispersive infrared spectroscopy (NDIR)UNKNOWN') or (mm =='Non-dispersive infrared spectroscopy (NDIR)ENVIRONMENT') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300 gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 49 O3 analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48w CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Maihak Unor 6N') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360E CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs undetermined') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300E gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 100 UV Fluorescent SO2 Analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO10M CO Analyzer') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 350 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)FUJI ZRC') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API undertermined') or (mm =='Non-dispersive infrared spectroscopy (NDIR)S-5006') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 350E CO analyser') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 48c CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Thermo model 42 NO/Nox analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)SFI CO12M') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360CE CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS 48') or (mm =='Non-dispersive infrared spectroscopy (NDIR)DASIBI 3008 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Teledyne API 300A gas filter correlation CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 370 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Environnement S.A. Model CO11M CO Analyzer') \
    or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 360 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9841A NOx analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)AAL 407') or (mm =='Non-dispersive infrared spectroscopy (NDIR)AMBIRACK') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Monitor Labs model 9830 CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)Horiba model APMA 300E CO analyser') or (mm =='Non-dispersive infrared spectroscopy (NDIR)PHILIPS K50109/00 Gas Filter Correlation CO analyser') or (mm =='UNKNOWNTeledyne API 300 gas filter correlation CO analyser') or (mm =='UNKNOWNHoriba model APMA 350 CO analyser') \
    or (mm =='Infrared gas filter correlationTHERMO ELECTRON INSTRUMENTS 48c') or (mm =='Infrared gas filter correlationHoriba model APMA 360 CO analyser') or (mm =='infrared absorptionUNKNOWN') or (mm =='Infrared gas filter correlationUNKNOWN') or (mm =='Infrared gas filter correlationTeledyne API 300E gas filter correlation CO analyser'):
        mm = 'non-dispersive infrared spectroscopy'
    
    if (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Opsis AR500 Open path monitor') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)UNKNOWN') or (mm ==  'Ultraviolet (UV) photometryDOAS') or (mm == 'Differential Optical Absorption Spectroscopy (DOAS)Environnement S.A. SANOA Multigas Longpath Monitoring System'): 
        mm = 'differential optical absorption spectrosocopy'
    
    if (mm == 'flame photometryThermo model 48 CO analyser') or (mm == 'flame photometryTeledyne API 300 gas filter correlation CO analyser'):
        mm = 'flame photometry'
    
    if (mm == 'Gas Chromatography (ref)UNKNOWN') or (mm == 'chromatographyUNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GUNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GEnvironnement VOC71M') or (mm == 'chromatographyMonitor Labs model 8440 NOx analyser') or (mm == 'Gas chromatography (GC) + flame ionisation (GC-FID)UNKNOWN') or (mm == 'Gas chromatography followed by flame ionization detection (GAIRMOZONE') or (mm =='Gas chromatography followed by flame ionization detection (GVarian Chrompack') or (mm =='chromatographyChrompack BTX CP7001 Monitor') or (mm =='Gas chromotography (GC)UNKNOWN'):
        mm = 'gas chromatography flame ionisation detection'    
    
    if (mm == "Griess-Saltzman reactionLipinski's aspirator") or (mm == 'Griess-Saltzman reaction101') or (mm == 'Griess-Saltzman reactionUNKNOWN') or (mm == "UNKNOWNLipinski's aspirator") or (mm == 'Griess-Saltzman reactionBUBBLER 24 H') or (mm == "Griess-Saltzman reactionLipinski's aspirator AGT24") or (mm == 'Griess-Saltzman reactionfilter pack') or (mm == 'NEDA Griess-Yloswayaspirator') or (mm == 'colorimetryUNKNOWN'):
        mm = 'griess saltzman colorimetric'
    
    if (mm == 'SpectrophotometrySequential Air Sampler, Type SS2000. NaI-impregnated glass sinters') or (mm == 'SpectrophotometryGlass tubes') or (mm == 'Spectrophotometryglass_sinter')  or (mm =='Spectrophotometryfilter pack') or (mm == 'SpectrophotometryUNKNOWN') or (mm == 'Spectrophotometryphotocolorimeter') or (mm == "SpectrophotometryLipinski's aspirator") or (mm == 'SpectrophotometryBUBBLER 24 H') or (mm == 'SpectrophotometryIMPREGNATED FILTER') or (mm == 'Spectrophotometryglass filter') or (mm == 'spectrophotometryUNKNOWN'):
        mm = 'spectrophotometry'
    
    if (mm == 'SpectrometryBUBBLER 24 H') or (mm == 'Atomic absorption spectrometry (AAS)UNKNOWN'):
        mm = 'spectrometry'
    
    if (mm == 'Ion chromatographyIMPREGNATED FILTER'):
        mm = 'ion chromatography'
    
    if (mm == 'diffusive samplerUNKNOWN') or (mm == 'UNKNOWNSEQUENTIAL SAMPLER') or (mm == 'TGS-ANSAFILTER'):
        mm = 'diffusive sampler'
        
    if (mm == 'Flame ionization detection (FID)Chrompack CP9000'):
        mm = 'flame ionisation detection'  
    
    if (mm == 'coulometryUNKNOWN'):
        mm = 'coulometry'
    
    if (mm == 'Gas chromatography + mass spectrometry (GC-MS)AF 20 M') or (mm == 'GAS CHROMATOGRAPHY - MASS SPECTROMETRYUNKNOWN') or (mm == 'Gas chromatography + mass spectrometry (GC-MS)UNKNOWN') or (mm == 'Gas chromatography + mass spectrometry GC-MS after solvent oMarkes Thermal Desorber + Agilent gas Chromatograph Mass Spectrometer'):
        mm = 'gas chromatography mass spectrometry'
    
    if (mm == 'Gas chromatography with photo ionization detectorSYNTECH SPECTRAS GC 955 series undetermined') or (mm == 'Gas chromatography with photo ionization detectorUNKNOWN'):
        mm = 'gas chromatography photo ionization detection'
    
    #if measurement type is unknown then set default measurement method for species
    try:
        if (np.isnan(mm) == True):
            if species == 'O3':
                mm = 'ultraviolet photometry'
            elif (species == 'NO') or (species == 'NO2'):
                mm = 'chemiluminescence'
            elif species == 'CO':
                mm = 'non-dispersive infrared spectroscopy'
            elif species == 'ISOP':
                mm == 'gas chromatography flame ionisation detection'
    except:
        if (mm == 'UNKNOWNUNKNOWN'):
            if species == 'O3':
                mm = 'ultraviolet photometry'
            elif (species == 'NO') or (species == 'NO2'):
                mm = 'chemiluminescence'
            elif species == 'CO':
                mm = 'non-dispersive infrared spectroscopy'
            elif species == 'ISOP':
                mm == 'gas chromatography flame ionisation detection'
            

    #do data quality checks
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

    #convert file res to standard format
    if file_res == 'hr':
        file_res = 'H'
    elif file_res == 'da':
        file_res = 'D'
    elif file_res == 'mo':
        file_res = 'M'
        
    #set sampling as average
    st = 'average'
    
    anthrome_class_name = 'na'

    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res
def site_iter_process(valid_refs,c):
#for site_ref in valid_refs:
    site_ref = valid_refs[c]
 
    data_valid = True
    print 'ref = ',site_ref
    site_test = all_refs == site_ref
    
    site_yyyymmdd = yyyymmdd[site_test]
    site_hhmm = hhmm[site_test]
    site_vals = vals[site_test]
    
    site_vals = np.float64(site_vals)
 
    #convert all invalids to -99999
    test_inv = site_vals < 0
    site_vals[test_inv] = -99999

    #put vals into full grid
    date_con = np.array(site_yyyymmdd).astype(int)
    time_con = np.array(site_hhmm).astype(int)
    
    #create max possible o3 grid
    full_data = np.empty(n_hours)
    full_data[:] = -99999
    
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    converted_time = modules.date_process(date_con,time_con,start_year)
    converted_time = np.round(converted_time,decimals=5)
    syn_grid_time = np.arange(0,n_days,1./24)
    syn_grid_time = np.round(syn_grid_time,decimals=5)
    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
    site_vals = np.array(site_vals)
    full_data[indices] = site_vals
    
    meta_index = meta_refs.index(site_ref)
    tz = float(meta_tz[meta_index])
    lat = np.float64(meta_lats[meta_index])
    lon = np.float64(meta_lons[meta_index])
    alt = np.float64(meta_alts[meta_index])
    raw_class_name = meta_class[meta_index]
        
    #check tz is whole number else skip site
    if (tz % 1) != 0:
        data_valid = False
        print 'Timezone is not a whole number. Skipping.'
        
    #correct timezone to UTC
    tz = int(tz)
    if tz < 0:
        #get rid of values at start and append -99999's at end
        cut = full_data[:tz]
        for num in range(np.abs(tz)):
            cut = np.insert(cut,0, -99999)
        full_data = cut
    elif tz > 0:
        #put -99999's at start and get rid of values at end
        cut = full_data[tz:]
        for num in range(tz):
            cut = np.append(cut, -99999)
        full_data = cut
            
    #if species is CO then convert units from ppmv to ppbv
    if species == 'CO':
        valid_inds = full_data != -99999 
        full_data[valid_inds] = full_data[valid_inds]*1e3        
    
    #do data quality checks        
    full_data,data_valid = modules.quality_check_nr(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)
    
    #set sampling as average
    if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'):
        st = 'average'
    elif (species == 'ISOP'):
        st = 'flask'   
    
    #set site file resolution
    if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'):
        file_res = 'H'
    elif (species == 'ISOP'):
        file_res = 'D'
    
    #check file res is ok for output res
    if (output_res == 'H'):
        if (file_res == 'D') or (file_res == 'M'):
            print 'File resolution has to be Minimum Hourly. Skipping'
            data_valid = False
            return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na'
    elif (output_res == 'D'):
        if (file_res == 'M'):
            print 'File resolution has to be Minimum Daily. Skipping'
            data_valid = False
            return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na'
    
    #set mm
    if species == 'O3':
        mm = 'ultraviolet photometry'
    elif (species == 'NO') or (species == 'NO2'):
        mm = 'chemiluminescence'
    elif species == 'CO':
        mm = 'non-dispersive infrared spectrometry'
    elif species == 'ISOP':
        mm = 'gas chromatography flame ionisation detection'
        
    anthrome_class_name = 'na'
        
    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res