def Output_files(New_combined, myBaseforResults, Site_ID, versionID): New_combined_grouped = New_combined.groupby([lambda x: x.year]) for year_index in New_combined_grouped: print year_index[0] print "Starting output for EddyProc MPI online tool" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults + "/REddyProc"): os.mkdir(myBaseforResults + "/REddyProc") mypathforResults = myBaseforResults + "/REddyProc/" #Calculate RH_con New_combined['RH_Con'] = metfuncs.RHfromabsolutehumidity( New_combined['Ah_Con'], New_combined['Ta_Con']) #Convert VPD in kPa to hPa. #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C New_combined['VPD_hPa_Con'] = (metfuncs.es( New_combined['Ta_Con'])) - (metfuncs.vapourpressure( New_combined['Ah_Con'], New_combined['Ta_Con'])) / 10 REddyProc_DF = New_combined[[ 'Fc', 'Fe', 'Fh', 'Fg', 'Ta_Con', 'Ts_Con', 'RH_Con', 'VPD_hPa_Con', 'ustar' ]] #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour. #Possible date formats are indicated in the input form. Never use an hour of 24 with the time #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day, #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0), #because then the data set is not chronological (this misunderstanding happened before). #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple() REddyProc_DF['DTcopy'] = REddyProc_DF.index REddyProc_DF['Day'] = REddyProc_DF['DTcopy'].apply( lambda x: int(x.strftime('%j'))) REddyProc_DF['Year'] = REddyProc_DF['DTcopy'].apply( lambda x: int(x.strftime('%Y'))) REddyProc_DF['Hour'] = REddyProc_DF['DTcopy'].apply( lambda x: float(x.strftime('%H')) + (float(x.strftime('%M')) / 60)) #Select current year of yaer only REddyProc_DF = REddyProc_DF[REddyProc_DF['Year'] == year_index[0]] #Calculate some things for plots n_datapoints = len(REddyProc_DF) startdate = REddyProc_DF.index[0] enddate = REddyProc_DF.index[n_datapoints - 1] print n_datapoints, startdate, enddate header_names = [ 'Year', 'Day', 'Hour', 'NEE', 'LE', 'H', 'Rg', 'Tair', 'Tsoil', 'rH', 'VPD', 'Ustar' ] columns_out = [ 'Year', 'Day', 'Hour', 'Fc', 'Fe', 'Fh', 'Fg', 'Ta_Con', 'Ts_Con', 'RH_Con', 'VPD_hPa_Con', 'ustar' ] newline1 = 'Year \t DoY \t Hour \t NEE \t LE \t H \t Rg \t Tair \t Tsoil \t rH \t VPD \t Ustar' newline2 = " -- \t -- \t -- \t umolm-2s-1 \t Wm-2 \t Wm-2 \t Wm-2 \t degC \t degC \t % \t hPa \t ms-1" #newline1='Year,Day,Hour,NEE,LE,H,Rg,Tair,Tsoil,rH,VPD,Ustar' #newline2="--,--,--,umolm-2s-1,Wm-2,Wm-2,Wm-2,degC,degC,%,hPa,ms-1" output_temp_filename = mypathforResults + '/REddyProc_temp_' + Site_ID + '_' + str( year_index[0]) + '_' + versionID + '.txt' output_filename = mypathforResults + '/REddyProc_' + Site_ID + '_' + str( year_index[0]) + '_' + versionID + '.txt' REddyProc_DF.to_csv(output_temp_filename, sep='\t', na_rep='-9999', float_format='%.3f', cols=columns_out, header=False, index=False, index_label=None, mode='w') #Now add another line with units #Open txt file with open(output_temp_filename) as infile: with open(output_filename, "w") as outfile: outfile.write(newline1 + "\n") outfile.write(newline2 + "\n") for i, line in enumerate(infile): outfile.write(line) os.remove(output_temp_filename) ##################### # Finish up ###################### print "FINISHED writing out files for use in EddyProc MPI online tool "
flag = numpy.zeros(len(Ta_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary(long_name="Air temperature", units="C") qcutils.CreateSeries(ds_erai, "Ta", Ta_erai_tts, Flag=flag, Attr=attr) # Interpolate the 3 hourly dew point temperature to the tower time step # and convert to Ah, RH and q # NOTE: ERA-I variables are dimensioned [time,latitude,longitude] Td_3d = erai_file.variables["d2m"][:, :, :] Td_erai_3hr = Td_3d[:, site_lat_index, site_lon_index] - 273.15 # get the spline interpolation function s = InterpolatedUnivariateSpline(erai_time_3hr, Td_erai_3hr, k=1) # get the dew point temperature at the towespeedr time step Td_erai_tts = s(erai_time_tts) # get the relative humidity es_erai_tts = mf.es(Ta_erai_tts) e_erai_tts = mf.es(Td_erai_tts) VPD_erai_tts = es_erai_tts - e_erai_tts flag = numpy.zeros(len(VPD_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary( long_name="Vapour pressure deficit", units="kPa") qcutils.CreateSeries(ds_erai, "VPD", VPD_erai_tts, Flag=flag, Attr=attr) RH_erai_tts = float(100) * e_erai_tts / es_erai_tts flag = numpy.zeros(len(RH_erai_tts), dtype=numpy.int32) attr = qcutils.MakeAttributeDictionary(long_name="Relative humidity", units="percent") qcutils.CreateSeries(ds_erai, "RH", RH_erai_tts, Flag=flag, Attr=attr)
def Output_files(New_combined,myBaseforResults,Site_ID,versionID,Ws_variable_name): #Do any calculations on the whole datasewt before grouping #Calculate RH_con New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con']) #Convert VPD in kPa to hPa. #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C New_combined['VPD_kPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con'])) #Do timestamp operations #Make a copy of timestamp to the df #Take mean first (equal to mid day) then convert to DOY, day, month and year New_combined['DTcopy']=New_combined.index New_combined['Year']=New_combined['DTcopy'].apply(lambda x: int(x.strftime('%Y'))) #Group DF by year New_combined_grouped=New_combined.groupby([lambda x: x.year]) for year_index in New_combined_grouped: print year_index[0] print "Starting output for NASA" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults+"/NASA_out"): os.mkdir(myBaseforResults+"/NASA_out") mypathforResults=myBaseforResults+"/NASA_out/" #Subset the DF to make it easier #WD removed here as its not required for NASA and for some sites SD variable names are not standard if Ws_variable_name=="Ws_CSAT": REddyProc_DF=New_combined[['DTcopy','Year','Ah_Con','Cc','eta','Fa','Fc_ustar','GPP_Con','Fre_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Ta_Con','Ts_Con','ustar','Ws_CSAT_Con','RH_Con','VPD_kPa_Con','Ah_Con_QCFlag','Fc_Con_QCFlag','Fe_Con_QCFlag','Fg_Con_QCFlag','Fh_Con_QCFlag','Fld_Con_QCFlag','Flu_Con_QCFlag','Fn_Con_QCFlag','Fsd_Con_QCFlag','Fsu_Con_QCFlag','ps_Con_QCFlag','Precip_Con_QCFlag','Sws_Con_QCFlag','Ta_Con_QCFlag','Ts_Con_QCFlag']] else: REddyProc_DF=New_combined[['DTcopy','Year','Ah_Con','Cc','eta','Fa','Fc_ustar','GPP_Con','Fre_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Ta_Con','Ts_Con','ustar','Ws_Con','RH_Con','VPD_kPa_Con','Ah_Con_QCFlag','Fc_Con_QCFlag','Fe_Con_QCFlag','Fg_Con_QCFlag','Fh_Con_QCFlag','Fld_Con_QCFlag','Flu_Con_QCFlag','Fn_Con_QCFlag','Fsd_Con_QCFlag','Fsu_Con_QCFlag','ps_Con_QCFlag','Precip_Con_QCFlag','Sws_Con_QCFlag','Ta_Con_QCFlag','Ts_Con_QCFlag']] #Select current year of yaer only REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]] #Calculate some things for plots n_datapoints=len(REddyProc_DF) startdate= REddyProc_DF.index[0] enddate= REddyProc_DF.index[n_datapoints-1] print n_datapoints,startdate,enddate #Calculate the DAILY means/sums from the half hourly data tempDF_mean=REddyProc_DF.groupby(lambda x : x.dayofyear).mean().add_suffix('_mean') tempDF_sum=REddyProc_DF.groupby(lambda x : x.dayofyear).sum().add_suffix('_sum') tempDF=tempDF_mean.join(tempDF_sum,how='left') #Add QC counts to the means DF #Good QC value not gap filled is 1. Get sall values ==1 then do a count. Divide by 48 for 48 half hour periods in the day tempDF['Rn_qc']=REddyProc_DF['Fn_Con_QCFlag'][REddyProc_DF['Fn_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['Rn_qc'].fillna(value=0,inplace=True) tempDF['Rs_qc']=REddyProc_DF['Fsd_Con_QCFlag'][REddyProc_DF['Fsd_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['Rs_qc'].fillna(value=0,inplace=True) tempDF['Ta_qc']=REddyProc_DF['Ta_Con_QCFlag'][REddyProc_DF['Ta_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['Ta_qc'].fillna(value=0,inplace=True) tempDF['VPD_qc']=REddyProc_DF['Ah_Con_QCFlag'][REddyProc_DF['Ah_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['VPD_qc'].fillna(value=0,inplace=True) tempDF['Ts_qc']=REddyProc_DF['Ts_Con_QCFlag'][REddyProc_DF['Ts_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['Ts_qc'].fillna(value=0,inplace=True) tempDF['NEE_qc']=REddyProc_DF['Fc_Con_QCFlag'][REddyProc_DF['Fc_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['NEE_qc'].fillna(value=0,inplace=True) tempDF['GPP_qc']=REddyProc_DF['Fc_Con_QCFlag'][REddyProc_DF['Fc_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['GPP_qc'].fillna(value=0,inplace=True) tempDF['Reco_qc']=REddyProc_DF['Fc_Con_QCFlag'][REddyProc_DF['Fc_Con_QCFlag']==1.].groupby(lambda x : x.dayofyear).count()/48 tempDF['Reco_qc'].fillna(value=0,inplace=True) #add a site lable to columns tempDF['Site_ID']=Site_ID tempDF['DTmean']=REddyProc_DF['DTcopy'].groupby(lambda x : x.dayofyear).min() tempDF['Day']=tempDF['DTmean'].apply(lambda x: int(x.strftime('%d'))) tempDF['Month']=tempDF['DTmean'].apply(lambda x: int(x.strftime('%m'))) tempDF['Year']=tempDF['DTmean'].apply(lambda x: int(x.strftime('%Y'))) # Jan the 1st is day 1 #tempDF['DOY'] = (tempDF['DTmean'] - dt.datetime(year_index[0], 1, 1)) tempDF['DOY'] = tempDF['DTmean'].apply(lambda x: int(x.strftime('%j'))) #Do conversions for Carbon variables (convert from umol to g C for NASA) tempDF['Fc_ustar_mean']=tempDF['Fc_ustar_mean']*60*60*24/1000000*12 tempDF['GPP_Con_mean']=tempDF['GPP_Con_mean']*60*60*24/1000000*12 tempDF['Fre_Con_mean']=tempDF['Fre_Con_mean']*60*60*24/1000000*12 #Do conversions for Radiation variables (convert Wm-2 to MJ m-2 day-1) tempDF['Fsd_Con_mean']=tempDF['Fsd_Con_mean']*60*60*24/1000000 tempDF['Fn_Con_mean']=tempDF['Fn_Con_mean'] *60*60*24/1000000 newline2="ID, Year, Mo, Day, DOY, Rn_f, Rn_qc, Rs_f, Rs_qc, Ta, Ta_qc, VPD, VPD_qc, Ts_f, Ts_qc, PREC, SWC, NEE, NEE_qc, GPP, GPP_qc, Reco, Reco_qc, PRESS, SNOWD" newline3="-, -, -, -, -, MJ m-2 day-1, -, MJ m-2 day-1, -, oC, -, kPa, -, oC, -, mm day-1, m3/m3, gC m-2 day-1, -, gC m-2 day-1, -, gC m-2 day-1, -, MPa day-1, mm" columns_out = ['Site_ID','Year','Month','Day','DOY', 'Fn_Con_mean','Rn_qc','Fsd_Con_mean','Rs_qc', 'Ta_Con_mean', 'Ta_qc', 'VPD_kPa_Con_mean', 'VPD_qc', 'Ts_Con_mean', 'Ts_qc', 'Precip_Con_sum', 'Sws_Con_mean', 'Fc_ustar_mean', 'NEE_qc', 'GPP_Con_mean', 'GPP_qc','Fre_Con_mean', 'Reco_qc', 'ps_Con_mean'] output_temp_filename=mypathforResults+'/NASA_SMAP_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv' output_filename=mypathforResults+'/NASA_SMAP_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv' tempDF[columns_out].to_csv(output_temp_filename, na_rep='-9999', float_format='%.3f', header=False, index=False, index_label=None, mode='w') #Now add another line with units #Open txt file with open(output_temp_filename) as infile: with open(output_filename,"w") as outfile: #outfile.write(newline1+"\n") outfile.write(newline2+"\n") outfile.write(newline3+"\n") for i,line in enumerate(infile): outfile.write(line) os.remove(output_temp_filename) ##################### # Finish up ###################### print "FINISHED writing out files for use for NASA "
def Output_files(New_combined,myBaseforResults,Site_ID,versionID): New_combined_grouped=New_combined.groupby([lambda x: x.year]) for year_index in New_combined_grouped: print year_index[0] print "Starting output for WAVES" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults+"/WAVES"): os.mkdir(myBaseforResults+"/WAVES") mypathforResults=myBaseforResults+"/WAVES/" #Calculate RH_con New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con']) #Convert VPD in kPa to hPa. #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C New_combined['VPD_hPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))/10 REddyProc_DF=New_combined[['Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_5cma','Sws_50cma','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con']] #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour. #Possible date formats are indicated in the input form. Never use an hour of 24 with the time #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day, #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0), #because then the data set is not chronological (this misunderstanding happened before). #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple() REddyProc_DF['DTcopy']=REddyProc_DF.index REddyProc_DF['Day']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%j'))) REddyProc_DF['Year']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%Y'))) REddyProc_DF['Hour']=REddyProc_DF['DTcopy'].apply(lambda x: float(x.strftime('%H'))+(float(x.strftime('%M'))/60)) #Select current year of yaer only REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]] #Calculate some things for plots n_datapoints=len(REddyProc_DF) startdate= REddyProc_DF.index[0] enddate= REddyProc_DF.index[n_datapoints-1] print n_datapoints,startdate,enddate #newline1="TIMESTAMP,Merged from Ah_HMP_23m Ah_7500_Av Ah_HMP_2m,CO2 concentration average,Merged from Cc_7500_Av converted to umol/mol,Horizontal rotation angle,Available energy using Fn Fg,CO2 flux rotated to natural wind coordinates WPL corrected Fc converted to umol/m2/s,Latent heat flux rotated to natural wind coordinates WPL corrected Fe,Element-wise average of series Fg_8cma Fg_8cmb Fg_8cmc Fg_8cmd Soil heat flux corrected for storage,Sensible heat flux rotated to natural wind coordinates Fh rotated and converted from virtual heat flux,Down-welling long wave,Up-welling long wave,Momentum flux rotated to natural wind coordinates,Merged from Fn_KZ Fn_NR Down-welling short wave Up-welling short wave,Air pressure standard deviation,Element-wise average of series Sws_10cma Sws_10cmb,Soil water fraction sensor 2a,Soil water fraction sensor 3a,Merged from Ta_HMP_23m Ta_CSAT Ta_HMP_2m,Vertical rotation angle,Element-wise average of series Ts_8cma,Friction velocity rotated to natural wind coordinates,ustar filtered for low turbulence conditions (<0.25),Wind speed,Wind direction" newline2="DSN,g/m3,mg/m3,deg,W/m2,umol/m2/s,W/m2,W/m2,W/m2,W/m2,W/m2,kg/m/s2,W/m2,W/m2,W/m2,kPa,mm,frac,frac,frac,C,deg,C,m/s,m/s,deg,hPa,frac" newline3= "TIMESTAMP,Ah_Con,Cc,eta,Fa,Fc_Con,Fe_Con,Fg_Con,Fh_Con,Fld_Con,Flu_Con,Fm,Fn_Con,Fsd_Con,Fsu_Con,ps_Con,Precip_Con,Sws_Con,Sws_5cm,Sws_50cm,Ta_Con,theta,Ts_Con,ustar,Ws_CSAT_Con,Wd_CSAT,RH_Con,VPD_hPa_Con" columns_out = ['DTcopy','Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_5cma','Sws_50cma','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con'] output_temp_filename=mypathforResults+'/WAVES_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv' output_filename=mypathforResults+'/WAVES_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv' REddyProc_DF.to_csv(output_temp_filename, na_rep='-9999', float_format='%.3f', cols=columns_out, header=False, index=False, index_label=None, mode='w') #Now add another line with units #Open txt file with open(output_temp_filename) as infile: with open(output_filename,"w") as outfile: #outfile.write(newline1+"\n") outfile.write(newline2+"\n") outfile.write(newline3+"\n") for i,line in enumerate(infile): outfile.write(line) os.remove(output_temp_filename) ##################### # Finish up ###################### print "FINISHED writing out files for use for WAVES "
def Output_files(New_combined,myBaseforResults,Site_ID,versionID): New_combined_grouped=New_combined.groupby([lambda x: x.year]) for year_index in New_combined_grouped: print year_index[0] print "Starting output for WAVES" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults+"/WAVES"): os.mkdir(myBaseforResults+"/WAVES") mypathforResults=myBaseforResults+"/WAVES/" #Calculate RH_con New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con']) #Convert VPD in kPa to hPa. #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C New_combined['VPD_hPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))/10 REddyProc_DF=New_combined[['Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_05','Sws_50','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con']] #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour. #Possible date formats are indicated in the input form. Never use an hour of 24 with the time #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day, #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0), #because then the data set is not chronological (this misunderstanding happened before). #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple() REddyProc_DF['DTcopy']=REddyProc_DF.index REddyProc_DF['Day']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%j'))) REddyProc_DF['Year']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%Y'))) REddyProc_DF['Hour']=REddyProc_DF['DTcopy'].apply(lambda x: float(x.strftime('%H'))+(float(x.strftime('%M'))/60)) #Select current year of yaer only REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]] #Calculate some things for plots n_datapoints=len(REddyProc_DF) startdate= REddyProc_DF.index[0] enddate= REddyProc_DF.index[n_datapoints-1] print n_datapoints,startdate,enddate #newline1="TIMESTAMP,Merged from Ah_HMP_23m Ah_7500_Av Ah_HMP_2m,CO2 concentration average,Merged from Cc_7500_Av converted to umol/mol,Horizontal rotation angle,Available energy using Fn Fg,CO2 flux rotated to natural wind coordinates WPL corrected Fc converted to umol/m2/s,Latent heat flux rotated to natural wind coordinates WPL corrected Fe,Element-wise average of series Fg_8cma Fg_8cmb Fg_8cmc Fg_8cmd Soil heat flux corrected for storage,Sensible heat flux rotated to natural wind coordinates Fh rotated and converted from virtual heat flux,Down-welling long wave,Up-welling long wave,Momentum flux rotated to natural wind coordinates,Merged from Fn_KZ Fn_NR Down-welling short wave Up-welling short wave,Air pressure standard deviation,Element-wise average of series Sws_10cma Sws_10cmb,Soil water fraction sensor 2a,Soil water fraction sensor 3a,Merged from Ta_HMP_23m Ta_CSAT Ta_HMP_2m,Vertical rotation angle,Element-wise average of series Ts_8cma,Friction velocity rotated to natural wind coordinates,ustar filtered for low turbulence conditions (<0.25),Wind speed,Wind direction" newline2="DSN,g/m3,mg/m3,deg,W/m2,umol/m2/s,W/m2,W/m2,W/m2,W/m2,W/m2,kg/m/s2,W/m2,W/m2,W/m2,kPa,mm,frac,frac,frac,C,deg,C,m/s,m/s,deg,hPa,frac" newline3= "TIMESTAMP,Ah_Con,Cc,eta,Fa,Fc_Con,Fe_Con,Fg_Con,Fh_Con,Fld_Con,Flu_Con,Fm,Fn_Con,Fsd_Con,Fsu_Con,ps_Con,Precip_Con,Sws_Con,Sws_5cm,Sws_50cm,Ta_Con,theta,Ts_Con,ustar,Ws_CSAT_Con,Wd_CSAT,RH_Con,VPD_hPa_Con" columns_out = ['DTcopy','Ah_Con','Cc','eta','Fa','Fc_Con','Fe_Con','Fg_Con','Fh_Con','Fld_Con','Flu_Con','Fm','Fn_Con','Fsd_Con','Fsu_Con','ps_Con','Precip_Con','Sws_Con','Sws_05','Sws_50','Ta_Con','theta','Ts_Con','ustar','Ws_CSAT_Con','Wd_CSAT','RH_Con','VPD_hPa_Con'] output_temp_filename=mypathforResults+'/WAVES_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv' output_filename=mypathforResults+'/WAVES_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.csv' REddyProc_DF.to_csv(output_temp_filename, na_rep='-9999', float_format='%.3f', cols=columns_out, header=False, index=False, index_label=None, mode='w') #Now add another line with units #Open txt file with open(output_temp_filename) as infile: with open(output_filename,"w") as outfile: for i,line in enumerate(infile): if i==0: #outfile.write(newline1+"\n") outfile.write(newline2+"\n") outfile.write(newline3+"\n") else: outfile.write(line) os.remove(output_temp_filename) ##################### # Finish up ###################### print "FINISHED writing out files for use for WAVES "
def Output_files(New_combined,myBaseforResults,Site_ID,versionID): New_combined_grouped=New_combined.groupby([lambda x: x.year]) for year_index in New_combined_grouped: print year_index[0] print "Starting output for EddyProc MPI online tool" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults+"/REddyProc"): os.mkdir(myBaseforResults+"/REddyProc") mypathforResults=myBaseforResults+"/REddyProc/" #Calculate RH_con New_combined['RH_Con']=metfuncs.RHfromabsolutehumidity(New_combined['Ah_Con'],New_combined['Ta_Con']) #Convert VPD in kPa to hPa. #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C New_combined['VPD_hPa_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con']))/10 REddyProc_DF=New_combined[['Fc','Fe','Fh','Fg','Ta_Con','Ts_Con','RH_Con','VPD_hPa_Con','ustar']] #The date/time components are separated into columns. E.g. first column: julian day, second column: decimal hour. #Possible date formats are indicated in the input form. Never use an hour of 24 with the time #format 'year', 'month', 'day', 'hour', 'minute' (use 0 instead). Hour '0' is interpreted as first hour of the day, #i.e. when you have transition from one to another it must be like (day, 23 --> day+1, 0) not like (day, 23 --> day, 0), #because then the data set is not chronological (this misunderstanding happened before). #REddyProc_DF['DT1','DT2','DT3','DT4','DT5','DT5']=REddyProc_DF.index.timetuple() REddyProc_DF['DTcopy']=REddyProc_DF.index REddyProc_DF['Day']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%j'))) REddyProc_DF['Year']=REddyProc_DF['DTcopy'].apply(lambda x: int(x.strftime('%Y'))) REddyProc_DF['Hour']=REddyProc_DF['DTcopy'].apply(lambda x: float(x.strftime('%H'))+(float(x.strftime('%M'))/60)) #Select current year of yaer only REddyProc_DF=REddyProc_DF[REddyProc_DF['Year']==year_index[0]] #Calculate some things for plots n_datapoints=len(REddyProc_DF) startdate= REddyProc_DF.index[0] enddate= REddyProc_DF.index[n_datapoints-1] print n_datapoints,startdate,enddate header_names = ['Year','Day','Hour','NEE','LE','H' ,'Rg','Tair' ,'Tsoil' ,'rH', 'VPD', 'Ustar'] columns_out = ['Year','Day','Hour','Fc', 'Fe','Fh','Fg','Ta_Con','Ts_Con','RH_Con','VPD_hPa_Con','ustar'] newline1='Year \t DoY \t Hour \t NEE \t LE \t H \t Rg \t Tair \t Tsoil \t rH \t VPD \t Ustar' newline2=" -- \t -- \t -- \t umolm-2s-1 \t Wm-2 \t Wm-2 \t Wm-2 \t degC \t degC \t % \t hPa \t ms-1" #newline1='Year,Day,Hour,NEE,LE,H,Rg,Tair,Tsoil,rH,VPD,Ustar' #newline2="--,--,--,umolm-2s-1,Wm-2,Wm-2,Wm-2,degC,degC,%,hPa,ms-1" output_temp_filename=mypathforResults+'/REddyProc_temp_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.txt' output_filename=mypathforResults+'/REddyProc_'+Site_ID+'_'+str(year_index[0])+'_'+versionID +'.txt' REddyProc_DF.to_csv(output_temp_filename, sep='\t', na_rep='-9999', float_format='%.3f', cols=columns_out, header=False, index=False, index_label=None, mode='w') #Now add another line with units #Open txt file with open(output_temp_filename) as infile: with open(output_filename,"w") as outfile: for i,line in enumerate(infile): if i==0: outfile.write(newline1+"\n") outfile.write(newline2+"\n") else: outfile.write(line) os.remove(output_temp_filename) ##################### # Finish up ###################### print "FINISHED writing out files for use in EddyProc MPI online tool "
def ANN_gapfill_func(myBaseforResults,New_combined,Site_ID,list_in,list_out,iterations,index_str,is_this_all,ANN_label_all,ANN_label,frequency,Use_Fc_Storage): ########################################################################################################### ## START MAIN CODE ########################################################################################################### if 'Fc' in list_out: units="umol.m-2.s-1" elif ('Fe' or 'Fh' or 'Fg') in list_out: units="W.m-2" else: units=" " ###### User-set IO file locations ###### print "Starting ANN gap filling" #Check for place to put results - does it exist? If not create if not os.path.isdir(myBaseforResults): os.mkdir(myBaseforResults) #Then subdirectories if not os.path.isdir(myBaseforResults+"/ANN"): os.mkdir(myBaseforResults+"/ANN") mypathforResults=myBaseforResults+"/ANN" #We need to update VPD for input here so also need e and es # Calculate vapour pressure from absolute humidity and temperature # Ah - absolute humidity, g/m3 # Ta - air temperature, C New_combined['VPD_Con']=(metfuncs.es(New_combined['Ta_Con']))-(metfuncs.vapourpressure(New_combined['Ah_Con'],New_combined['Ta_Con'])) number_of_inputs=len(list_in) number_of_outputs=len(list_out) #startdate=dt.date(2008,7,1) #enddate=dt.date(2008,8,1) alllist=list_in + list_out xnow=New_combined[alllist] #[startdate:enddate] xnow=xnow.dropna(how='any') #Drop nans and missing values so that Good data only is used in the training xarray=np.array(xnow.dropna().reset_index(drop=True)) #Define inputs and targets for NN from DF inputs = xarray[:, :number_of_inputs] #first 2 columns lastcolums=(-1*number_of_outputs) targets = xarray[:, lastcolums:] #last column # Generate standard layered network architecture and create network #different network architectures avaiable #conec = mlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates standard multilayer network architecture conec = tmlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates multilayer network full connectivity list #conec = imlgraph((number_of_inputs,24,16,number_of_outputs)) # Creates multilayer architecture with independent outputs net = ffnet(conec) print "TRAINING NETWORK..." net.train_tnc(inputs, targets, maxfun = iterations, messages=1) #net.train_rprop(inputs, targets, maxiter=iterations) #net.train_momentum(inputs, targets, maxfun = iterations, messages=1) #net.train_genetic(inputs, targets, maxfun = iterations, messages=1) #net.train_cg(inputs, targets, maxfun = iterations, messages=1) #net.train_bfgs(inputs, targets, maxfun = iterations, messages=1) # Test network print "TESTING NETWORK..." output, regression = net.test(inputs, targets, iprint = 0) print "R-squared: %s " %str(regression[0][2]) #print "max. absolute error: %s " %str(abs( array(output).reshape( len(output) ) - array(targets) ).max()) output, regress = net.test(inputs, targets) #Create array for results. Then loop through elements on the original data to predict the ANN value predicted=np.empty((len(xarray),number_of_outputs)) observed=np.empty((len(xarray),number_of_outputs)) for index,rowdata in enumerate(xarray): predicted[index]=net([rowdata[0:number_of_inputs]]) observed[index]=np.array(rowdata[-1.0*number_of_outputs : ]) #observed[index]=np.array(rowdata[(-1.0*number_of_outputs)]) ############################################ # Generate output and return new variables ############################################ #Create a new variable called '_NN' for index, item in enumerate(list_out): ANN_label=str(item+"_NN") ANN_label_all=str(item+"_NN_all") if is_this_all == True: New_combined[ANN_label_all]=net.call(New_combined[list_in])[:,index] else: New_combined[ANN_label]=net.call(New_combined[list_in])[:,index] for index, item in enumerate(list_out): ##################################################### # Plots ##################################################### #Plot time series of all 30 minute data mintimeseries_plot(mypathforResults,predicted,observed,regress,item, Site_ID,units,targets,output,list_out,index_str) #Plot regression of Tower versus ANN regressionANN2(mypathforResults,predicted,observed,regress,item, Site_ID,units,list_out,index_str) #Plot diurnals for every second month 6 graphs - only when enough months so all or annual if frequency=="all" or frequency=="annual" or is_this_all==True: Doplots_diurnal_monthly(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,is_this_all) #Plot diurnals for every second month 6 graphs Doplots_diurnal(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,frequency) #Plot timeseries of monthly over all periods Doplots_monthly(mypathforResults,New_combined,item, Site_ID,units,list_out,index_str,frequency) return New_combined