def extract_water_use_data(dataframe, site): """This function iterates through a measurement list, extracting water use data from Hilltop, and compiling it into a dataframe""" # Set base parameters base_url = 'http://wateruse.ecan.govt.nz' hts = 'WaterUse.hts' # Create empty dataframe to append raw data into raw_data = pd.DataFrame(columns = ['Measurement','DateTime','Value']) # Find the start date of the time series from_d = dataframe['FromDate'].iloc[0] # Iterate through measurement list, extracting data and compiling for index, row in dataframe.iterrows(): measurement = row['Measurement'] to_d = row['ToDate'] if from_d <= to_d: try: print("Extracting {0} data from {1} to {2}".format(measurement, from_d, to_d)) tsdata = ws.get_data(base_url, hts, site, measurement, from_date=str(from_d), to_date=str(to_d)) tsdata2 = tsdata.reset_index().drop(columns='Site') raw_data = pd.concat([raw_data, tsdata2], ignore_index=True) # Adjust start date to prevent overlapping time series from_d = to_d + dt.timedelta(days=1) except: print('No data extracted for:', measurement) from_d = to_d + dt.timedelta(days=1) else: print('Skipping extraction for:', measurement) return raw_data
def test_get_data1(): tsdata1 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date) assert len(tsdata1) > 80
def test_get_data3(data): tsdata3 = get_data(data['base_url'], data['hts'], data['site'], 'WQ Sample', from_date=data['from_date'], to_date=data['to_date']) assert len(tsdata3) > 800
def test_get_data1(data): tsdata1 = get_data(data['base_url'], data['hts'], data['site'], data['measurement'], from_date=data['from_date'], to_date=data['to_date']) assert len(tsdata1) > 80
def test_get_data3(): tsdata3 = get_data(base_url, hts, site, 'WQ Sample', from_date=from_date, to_date=to_date) assert len(tsdata3) > 800
def extract_water_use_data(site, measurement, from_d, to_d): """This function extracts water use data from Hilltop, and compiles it into a dataframe""" # Set base parameters base_url = 'http://wateruse.ecan.govt.nz' hts = 'WaterUse.hts' tsdata = ws.get_data(base_url, hts, site, measurement, from_date=str(from_d), to_date=str(to_d)) tsdata2 = tsdata.reset_index().drop(columns='Site') return tsdata2
def test_get_data2(data): tsdata2, extra2 = get_data(data['base_url'], data['hts'], data['site'], data['measurement'], from_date=data['from_date'], to_date=data['to_date'], parameters=True) assert (len(tsdata2) > 80) & (len(extra2) > 300)
def get_volume_data(site, from_date, to_date): """Extracts compliance volume data from Hilltop for a given date range, and sums the reading counts for each day""" base_url = 'http://wateruse.ecan.govt.nz' hts = 'WaterUse.hts' measurement = 'Compliance Volume' tsdata = ws.get_data(base_url, hts, site, measurement, from_date, to_date) vol_data = tsdata.reset_index().drop(columns='Site').drop( columns='Measurement') return vol_data
def test_get_data2(): tsdata2, extra2 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date, parameters=True) assert (len(tsdata2) > 80) & (len(extra2) > 300)
def test_site_mtypes(hts): sites = site_list(base_url, hts) site1 = sites.iloc[2].SiteName mtype_df1 = measurement_list(base_url, hts, site1).reset_index().iloc[0] tsdata1 = get_data(base_url, hts, site1, mtype_df1.Measurement, from_date=str(mtype_df1.From), to_date=str(mtype_df1.From)) assert len(tsdata1) == 1
def test_get_data4(): tsdata4, extra4 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date, parameters=True, dtl_method=dtl_method) assert (len(tsdata4) > 80) & (len(extra4) > 300) & (tsdata4.Value.dtype.name == 'float32')
def test_get_data4(data): tsdata4, extra4 = get_data(data['base_url'], data['hts'], data['site'], data['measurement'], from_date=data['from_date'], to_date=data['to_date'], parameters=True, dtl_method=data['dtl_method']) assert (len(tsdata4) > 80) & (len(extra4) > 300) & (tsdata4.Value.dtype == np.number)
def get_volume_data(base_url, hts, site, measurement, from_date, to_date): """Extracts compliance volume data from Hilltop for a given date range, and sums the reading counts for each day""" tsdata = ws.get_data(base_url, hts, site, measurement, from_date, to_date) dfdata = tsdata.reset_index().drop(columns='Site').drop( columns='Measurement') dfdata['Date'] = dfdata['DateTime'].dt.date daily_counts = dfdata.groupby(['Date'])['Value'].agg( ['count']).rename(columns={'count': 'Readings'}) idx = pd.date_range(from_date, to_date) daily_counts2 = daily_counts.reindex(idx, fill_value=0) return daily_counts2
def extract_water_use_data(site, measurement, from_d, to_d): """This function extracts water use data from Hilltop for a specified site, measurement type and date range""" print("Processing {} data".format(measurement)) # Set base parameters base_url = 'http://wateruse.ecan.govt.nz' hts = 'WaterUse.hts' # Extract data tsdata = ws.get_data(base_url, hts, site, measurement, from_date=str(from_d), to_date=str(to_d)) tsdata2 = tsdata.reset_index().drop(columns='Site') return tsdata2
def ecan_ts_data(server, database, site_ts_summ, from_date, to_date, dtl_method=None): """ """ dataset1 = site_ts_summ.DatasetTypeID.iloc[0] sites1 = site_ts_summ.ExtSiteID.unique().tolist() if dataset1 < 10000: ts1 = mssql.rd_sql(server, database, ts_table, ['ExtSiteID', 'DateTime', 'Value'], where_in={ 'DatasetTypeID': [dataset1], 'ExtSiteID': sites1 }, from_date=from_date, to_date=to_date, date_col='DateTime') else: ts_list = [] mtype = site_ts_summ.MeasurementType.iloc[0] for s in sites1: ts0 = ws.get_data(base_url, hts, s, mtype, from_date, to_date, dtl_method=dtl_method) ts_list.append(ts0) ts1 = pd.concat(ts_list).reset_index().drop('Measurement', axis=1) ts1.rename(columns={'Site': 'ExtSiteID'}, inplace=True) return ts1
mtypes = [] for s in sites['Site'].values: mtypes.append(ws.measurement_list(ecan_base_url, hts_name, s, mtype)) mtypes_df = pd.concat(mtypes).reset_index().drop('DataType', axis=1) mtypes_df['n_days'] = (mtypes_df['To'] - mtypes_df['From']).dt.days mtypes_df1 = mtypes_df[mtypes_df['n_days'] >= min_n_days].copy() sites1 = pd.merge(sites, mtypes_df1, on='Site') ts_data = [] for index, row in sites1.iterrows(): ts_data.append( ws.get_data(ecan_base_url, hts_name, row['Site'], row['Measurement'])) ts_data_df = pd.concat(ts_data) ts_data_df.index = ts_data_df.index.droplevel('Measurement') ts_data_df2 = ts_data_df['Value'].unstack(0) ################################################# ### Process station data period_data1 = ts_data_df2[from_date:to_date].copy() missing_days = period_data1.isnull().sum() good_ones = missing_days[ missing_days <= min_missing_days].reset_index()['Site'] good_sites1 = period_data1.loc[:, period_data1.columns.isin(good_ones
def calcTLI(x): print sitename # ## TP ##### measurement = 'Total Phosphorus' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate,dtl_method='half').reset_index() #, dtl_method='half') dates_TP = wq1['DateTime'] ############# remove this when data base fixed TP1 = wq1['Value'] TP_1 = pd.to_numeric(TP1, errors='coerce') TP_values = TP_1.astype(float).fillna(0.002).values TP = numpy.zeros(len(TP_values)) ###### remove this when database fixed for i in range (0,len(TP_values)): if ((sitename == 'Sumner') or (sitename == 'Coleridge')) and (TP_values[i] > 0.055): TP[i] = 2.0 elif ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055): TP[i] = 4.0 elif ((sitename == 'Marion') and (TP_values[i] > 0.3)): TP[i] = 13.0 # For Lake Benmore non-detects are treated as dl, not half dl elif (((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri') or (sitename == 'Benmore_Haldon')) and (TP_values[i] == 0.002)): TP[i] = 4.0 else: TP[i] = 1000.0*TP_values[i] raw_data = {'DateTP': dates_TP,'TP': TP} ################ remove to here ### put back in: # TP_1 = 1000.0*pd.to_numeric(TP1, errors='coerce') #### make new dataframe # raw_data = {'Date': dates_TP,'TP': TP_values} df = pd.DataFrame(raw_data, columns = ['DateTP', 'TP']) ##### drop Benmore Boat data if (sitename == 'Benmore_Haldon'): # df['Date']= pd.to_datetime(df['Date']) # df['Date'] = df['Date'].apply(lambda x: x.date()) # print df.DateTP ## date_list = ('2018-10-26') # date_list = pd.to_datetime('2018-10-26').date() ## df.drop(pd.to_datetime('2018-10-26')) ## date_list = [datetime(2018, 10, 26), ## datetime(2018, 11, 20), ## datetime(2018, 12, 19), ## datetime(2019, 1, 21), ## datetime(2019, 2, 12), ## datetime(2019, 3, 18), ## datetime(2019, 4, 12)] # print date_list ## df = df.drop(df.Date[date_list]) df = df.drop([df.index[60],df.index[61],df.index[62],df.index[65],df.index[67]])## 21-1-19 and 12-4-19 not pushed through yet print df #https://stackoverflow.com/questions/35372499/how-can-i-delete-rows-for-a-particular-date-in-a-pandas-dataframe #https://thispointer.com/python-pandas-how-to-drop-rows-in-dataframe-by-index-labels/ # ### TN ### measurement = 'Total Nitrogen' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate, dtl_method='half').reset_index() dates_TN = wq1['DateTime'] TN1 = wq1['Value'] TN_1 = pd.to_numeric(TN1, errors='coerce') TN_values = TN_1.astype(float).fillna(0.005).values ############# remove this when data base fixed TN = numpy.zeros(len(TN_values)) for i in range (0,len(TN_values)): if ((sitename == 'Marion') and (TN_values[i] > 1.3)): TN[i] = 350.0 else: TN[i] = 1000.0*TN_values[i] raw_data2 = {'DateTN': dates_TN,'TN': TN} ############### remove to here ############# put this back in # TN_values = 1000.0*pd.to_numeric(TN1, errors='coerce') # ## make data frame with Date,TN # raw_data2 = {'Date': dates_TN,'TN': TN_values} df2 = pd.DataFrame(raw_data2, columns = ['DateTN', 'TN']) ##### drop Benmore Boat data if (sitename == 'Benmore_Haldon'): # print df2.DateTN df2 = df2.drop([df2.index[60],df2.index[61],df2.index[62],df2.index[65],df2.index[67]])## 21-1-19 and 12-4-19 not pushed through yet print df2 ############## chla ### measurement = 'Chlorophyll a (planktonic)' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate, dtl_method='half').reset_index() dates_chla = wq1['DateTime'] chla1 = wq1['Value'] chla_1 = pd.to_numeric(chla1, errors='coerce') chla_values2 = chla_1.astype(float).fillna(0.1).values chla = numpy.zeros(len(chla_values2)) for i in range (0,len(chla_values2)): ############# remove this when data base fixed if ((sitename == 'Marion') and (chla_values2[i] > 50.0)): chla[i] = 2.7 ############### remove to her and change elif to if # elif (chla_values2[i] < 0.19): # chla[i] = 1000.0*chla_values2[i] # #remove next two lines when 2011 marhc april fixed elif (chla_values2[i] > 150): chla[i] = chla_values2[i]/1000.0 else: chla[i] = chla_values2[i] raw_data3 = {'DateChla': dates_chla,'chla': chla} df3 = pd.DataFrame(raw_data3, columns = ['DateChla', 'chla']) ##### drop Benmore Boat data if (sitename == 'Benmore_Haldon'): # print df3.DateChla df3 = df3.drop([df3.index[62],df3.index[63],df3.index[64],df3.index[67],df3.index[69]])## 21-1-19 and 12-4-19 not pushed through yet print df3 df.set_index(['DateTP']) df2.set_index(['DateTN']) df3.set_index(['DateChla']) sq = site print sq ### for Lake For Lake Benmore non-detects are treated as dl, not half dl if ((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri') or (sitename == 'Benmore_Haldon')): measurement = 'Total Phosphorus' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate).reset_index() dates_TP = wq1['DateTime'] TP1 = wq1['Value'] TP_1 = pd.to_numeric(TP1, errors='coerce') TP_values = TP_1.astype(float).fillna(0.004).values TP = numpy.zeros(len(TP_values)) for i in range (0,len(TP_values)): if ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055): TP[i] = 4.0 elif (TP_values[i] == 0.002): TP[i] = 4.0 else: TP[i] = 1000.0*TP_values[i] raw_data = {'DateTP': dates_TP,'TP': TP} df = pd.DataFrame(raw_data, columns = ['DateTP', 'TP']) df.set_index(['DateTP']) ################################################################### ##output csv withdate, chla, Tn, TP, Turbidity for timetrends ##https://pypi.org/project/pymannkendall/ # # ### Turbidity ### # # measurement = 'Turbidity' # wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate).reset_index() # dates_Turbidity = wq1['DateTime'] # Turbidity_values1 = wq1['Value'] # # Turb_1 = pd.to_numeric(Turbidity_values1, errors='coerce') # Turbidity_values = Turb_1.astype(float).fillna(0.1).values # # raw_data4 = {'DateT': dates_Turbidity,'Turbidity': Turbidity_values} # df4 = pd.DataFrame(raw_data4, columns = ['DateT', 'Turbidity']) # # ##### drop Benmore Boat data # if (sitename == 'Benmore_Haldon'): # print df4.DateT # df4 = df4.drop([df4.index[59],df4.index[60],df4.index[61],df4.index[64],df4.index[66]])## 21-1-19 and 12-4-19 not pushed through yet # print df4 # # df4.set_index(['DateT']) # ###Output ##### build dataframe with all # dfcombined= pd.concat([df, df2, df3, df4], axis=1, join_axes=[df.index]) # # Output to csv # dfcombined.to_csv(str(datapath_out)+'TT_'+sitename+'.csv') # ############################### # Medians and NPS bands #TN new_df = df2.set_index('DateTN').copy() new_df.index = pd.to_datetime(new_df.index) ## annual medians TN_mean1 = new_df.resample('A-JUN').median() # annual meadian for hydro year TN_mean = TN_mean1.TN Years_TN = pd.DatetimeIndex(TN_mean.index).year ######### Polymictic lakes if ((sitename == 'Emma') or (sitename == 'Emily') or (sitename == 'Georgina') or (sitename == 'MaoriFront') or (sitename == 'MaoriBack') or (sitename == 'Denny') or (sitename == 'McGregor') or (sitename == 'Middleton')or (sitename == 'Kellands_shore') or (sitename == 'Kellands_mid')): TN_bands = numpy.zeros(len(TN_mean)) for i in range(0,len(TN_mean)): if (TN_mean[i] <= 300.0): TN_bands[i] = 1.0 elif (300.0 <TN_mean[i] <= 500.0): TN_bands[i] = 2.0 elif (500.0 <TN_mean[i] <= 800.0): TN_bands[i] = 3.0 elif (TN_mean[i] > 800.0): TN_bands[i] = 4.0 else: TN_bands[i] = 100.0 # print TN_bands else: TN_bands = numpy.zeros(len(TN_mean)) for i in range(0,len(TN_mean)): if (TN_mean[i] <= 160.0): TN_bands[i] = 1.0 elif (160.0 <TN_mean[i] <= 350.0): TN_bands[i] = 2.0 elif (350.0 <TN_mean[i] <= 750.0): TN_bands[i] = 3.0 elif (TN_mean[i] > 750.0): TN_bands[i] = 4.0 else: TN_bands[i] = 100.0 # print TN_bands # TN_bands = [] # for i in range(0,len(TN_mean)): # if (TN_mean[i] < 160.1): # TN_bands[i] = 'A' # elif (160.1 <TN_mean[i] < 350.1): # TN_bands[i] = 'B' # elif (350.1 <TN_mean[i] < 750.1): # TN_bands[i] = 'C' # elif (TN_mean[i] > 750.1): # TN_bands[i] = 'D' # else: # TN_bands[i] = 'NA' # print TN_bands # raw_data5 = {'Year': Years_TN,'TNmean': TN_mean, 'TNBand': TN_bands} # df_Mean_TN= pd.DataFrame(raw_data5, columns = ['Year', 'TNmean','TNBand']) ## print df_Mean_TN # TP new_df = df.set_index('DateTP').copy() new_df.index = pd.to_datetime(new_df.index) ## annual means TP_mean1 = new_df.resample('A-JUN').median() TP_mean = TP_mean1.TP Years_TP = pd.DatetimeIndex(TP_mean.index).year TP_bands = numpy.zeros(len(TP_mean)) for i in range(0,len(TP_mean)): if (TP_mean[i] <= 10.0): TP_bands[i] = 1.0 elif (10.0 <TP_mean[i] <= 20.0): TP_bands[i] = 2.0 elif (20.0 <TP_mean[i] <= 50.0): TP_bands[i] = 3.0 elif (TP_mean[i] > 50.0): TP_bands[i] = 4.0 else: TP_bands[i] = 100.0 # print TP_bands # raw_data6 = {'Year': Years_TP,'TPmean': TP_mean} # df_Mean_TP= pd.DataFrame(raw_data6, columns = ['Year', 'TPmean']) #Chla new_df = df3.set_index('DateChla').copy() new_df.index = pd.to_datetime(new_df.index) ## annual means chla_mean1 = new_df.resample('A-JUN').median() chla_mean = chla_mean1.chla Years_chla = pd.DatetimeIndex(chla_mean.index).year chla_bands = numpy.zeros(len(chla_mean)) for i in range(0,len(chla_mean)): if (chla_mean[i] <= 2.0): chla_bands[i] = 1.0 elif (2.0 <chla_mean[i] <= 5.0): chla_bands[i] = 2.0 elif (5.0 <chla_mean[i] <= 12.0): chla_bands[i] = 3.0 elif (chla_mean[i] > 12.0): chla_bands[i] = 4.0 else: chla_bands[i] = 100.0 # print chla_bands ## annual max chla_max1 = new_df.resample('A-JUN').max() chla_max = chla_max1.chla chla_max_bands = numpy.zeros(len(chla_max)) for i in range(0,len(chla_max)): if (chla_max[i] <= 10.0): chla_max_bands[i] = 1.0 elif (10.0 <chla_max[i] <= 25.0): chla_max_bands[i] = 2.0 elif (25.0 <chla_max[i] <= 60.0): chla_max_bands[i] = 3.0 elif (chla_max[i] > 60.0): chla_max_bands[i] = 4.0 else: chla_max_bands[i] = 100.0 # print chla_max_bands # raw_data7 = {'Year': Years_chla,'chla_mean': chla_mean} # df_Mean_chla= pd.DataFrame(raw_data7, columns = ['Year', 'chla_mean']) raw_data8 = {'Year': Years_TN,'TNmean': TN_mean, 'TNBand': TN_bands,'TPmean': TP_mean, 'TPBand': TP_bands,'chlamean': chla_mean, 'chlaBand': chla_bands,'chlamax': chla_max, 'chlaMaxBand': chla_max_bands} df_NPS= pd.DataFrame(raw_data8, columns = ['Year','TNmean','TNBand','TPmean','TPBand','chlamean','chlaBand','chlamax','chlaMaxBand']) print df_NPS ##Output # ### build dataframe with all # dfcombined= pd.concat([df_TLI, df_chla, df_TP, df_TN], axis=1, join_axes=[df_TLI.index]) # Output to csv df_NPS.to_csv(str(datapath_out)+'NPS_'+sitename+'.csv')
site = 'SQ31045' measurement = 'Total Phosphorus' from_date = '1983-11-22 10:50' to_date = '2018-04-13 14:05' dtl_method = 'trend' ########################################## ### Examples ## Get site list sites = site_list(base_url, hts) ## Get the measurement types for a specific site mtype_df1 = measurement_list(base_url, hts, site) ## Get the water quality parameter data (only applies to WQ data) mtype_df2 = wq_sample_parameter_list(base_url, hts, site) ## Get the time series data for a specific site and measurement type tsdata1 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date) ## Get extra WQ time series data (only applies to WQ data) tsdata2, extra2 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date, parameters=True) ## Get WQ sample data (only applies to WQ data) tsdata3 = get_data(base_url, hts, site, 'WQ Sample', from_date=from_date, to_date=to_date) ## Convert values under the detection limit to numeric values (only applies to WQ data) tsdata4, extra4 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date, parameters=True, dtl_method=dtl_method)
for site in site_measurement_df.index.unique(0).tolist(): # Loop through each measurement listed at the site for measurement in site_measurement_df.loc[site].index.tolist(): # Record site measurement to which sample count data is to be appended counts = [site, measurement] # Aluminium, Total results cannot be extracted with python - skip # See BY20/0150 Measurement Parameters or try to view table in Manager if measurement == 'Aluminium, Total': counts += ['Unknown'] * 6 site_measurement_counts.append(counts) continue # Call the data for the specified site and measurement try: data = ws.get_data(base_url, hts, site, measurement, from_date='1001-01-01', to_date='9999-01-01') # Some measurements have no data (ie. BX23/0035 - Benzo[a]anthracene) except ValueError: data = pd.DataFrame() # Check if data exists for site and measurement if data.empty: counts += [None] * 6 else: # Format data to filter by date data_all = data.reset_index(level=2) # Record the min, max, and count of dates with samples counts += [ min(data_all['DateTime'].tolist()), max(data_all['DateTime'].tolist()),
def calcTLI(x): print sitename # ## TP ##### measurement = 'Total Phosphorus' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate, dtl_method='half').reset_index() #, dtl_method='half') dates_TP = wq1['DateTime'] ############# remove this when data base fixed TP1 = wq1['Value'] TP_1 = pd.to_numeric(TP1, errors='coerce') TP_values = TP_1.astype(float).fillna(0.002).values TP = numpy.zeros(len(TP_values)) ###### remove this when database fixed for i in range(0, len(TP_values)): if ((sitename == 'Sumner') or (sitename == 'Coleridge')) and (TP_values[i] > 0.055): TP[i] = 2.0 elif ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055): TP[i] = 4.0 elif ((sitename == 'Marion') and (TP_values[i] > 0.3)): TP[i] = 13.0 # For Lake Benmore non-detects are treated as dl, not half dl elif ( ((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri') or (sitename == 'Benmore_Haldon')) and (TP_values[i] == 0.002)): TP[i] = 4.0 else: TP[i] = 1000.0 * TP_values[i] raw_data = {'Date': dates_TP, 'TP': TP} ################ remove to here ### put back in: # TP_1 = 1000.0*pd.to_numeric(TP1, errors='coerce') #### make new dataframe # raw_data = {'Date': dates_TP,'TP': TP_values} df = pd.DataFrame(raw_data, columns=['Date', 'TP']) ##### drop Benmore Boat data if (sitename == 'Benmore_Haldon'): # df['Date']= pd.to_datetime(df['Date']) # df['Date'] = df['Date'].apply(lambda x: x.date()) print df.Date ## date_list = ('2018-10-26') # date_list = pd.to_datetime('2018-10-26').date() ## df.drop(pd.to_datetime('2018-10-26')) ## date_list = [datetime(2018, 10, 26), ## datetime(2018, 11, 20), ## datetime(2018, 12, 19), ## datetime(2019, 1, 21), ## datetime(2019, 2, 12), ## datetime(2019, 3, 18), ## datetime(2019, 4, 12)] # print date_list ## df = df.drop(df.Date[date_list]) df = df.drop([ df.index[60], df.index[61], df.index[62], df.index[65], df.index[67] ]) ## 21-1-19 and 12-4-19 not pushed through yet print df #https://stackoverflow.com/questions/35372499/how-can-i-delete-rows-for-a-particular-date-in-a-pandas-dataframe #https://thispointer.com/python-pandas-how-to-drop-rows-in-dataframe-by-index-labels/ # ### TN ### measurement = 'Total Nitrogen' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate, dtl_method='half').reset_index() dates_TN = wq1['DateTime'] TN1 = wq1['Value'] TN_1 = pd.to_numeric(TN1, errors='coerce') TN_values = TN_1.astype(float).fillna(0.005).values ############# remove this when data base fixed TN = numpy.zeros(len(TN_values)) for i in range(0, len(TN_values)): if ((sitename == 'Marion') and (TN_values[i] > 1.3)): TN[i] = 350.0 else: TN[i] = 1000.0 * TN_values[i] raw_data2 = {'Date': dates_TN, 'TN': TN} ############### remove to here ############# put this back in # TN_values = 1000.0*pd.to_numeric(TN1, errors='coerce') # ## make data frame with Date,TN # raw_data2 = {'Date': dates_TN,'TN': TN_values} df2 = pd.DataFrame(raw_data2, columns=['Date', 'TN']) ##### drop Benmore Boat data if (sitename == 'Benmore_Haldon'): print df2.Date df2 = df2.drop([ df2.index[60], df2.index[61], df2.index[62], df2.index[65], df2.index[67] ]) ## 21-1-19 and 12-4-19 not pushed through yet print df2 ############## chla ### measurement = 'Chlorophyll a (planktonic)' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate, dtl_method='half').reset_index() dates_chla = wq1['DateTime'] chla1 = wq1['Value'] chla_1 = pd.to_numeric(chla1, errors='coerce') chla_values2 = chla_1.astype(float).fillna(0.1).values chla = numpy.zeros(len(chla_values2)) for i in range(0, len(chla_values2)): ############# remove this when data base fixed if ((sitename == 'Marion') and (chla_values2[i] > 50.0)): chla[i] = 2.7 ############### remove to her and change elif to if # elif (chla_values2[i] < 0.19): # chla[i] = 1000.0*chla_values2[i] # #remove next two lines when 2011 marhc april fixed elif (chla_values2[i] > 150): chla[i] = chla_values2[i] / 1000.0 else: chla[i] = chla_values2[i] raw_data3 = {'Date': dates_chla, 'chla': chla} df3 = pd.DataFrame(raw_data3, columns=['Date', 'chla']) ##### drop Benmore Boat data if (sitename == 'Benmore_Haldon'): print df3.Date df3 = df3.drop([ df3.index[62], df3.index[63], df3.index[64], df3.index[67], df3.index[69] ]) ## 21-1-19 and 12-4-19 not pushed through yet print df3 ### Turbidity ### measurement = 'Turbidity' wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate).reset_index() dates_Turbidity = wq1['DateTime'] Turbidity_values1 = wq1['Value'] Turb_1 = pd.to_numeric(Turbidity_values1, errors='coerce') Turbidity_values = Turb_1.astype(float).fillna(0.1).values raw_data4 = {'Date': dates_Turbidity, 'Turbidity': Turbidity_values} df4 = pd.DataFrame(raw_data4, columns=['Date', 'Turbidity']) ##### drop Benmore Boat data if (sitename == 'Benmore_Haldon'): print df4.Date df4 = df4.drop([ df4.index[59], df4.index[60], df4.index[61], df4.index[64], df4.index[66] ]) ## 21-1-19 and 12-4-19 not pushed through yet print df4 df.set_index(['Date']) df2.set_index(['Date']) df3.set_index(['Date']) df4.set_index(['Date']) #########Limits if ((sitename == 'Sumner') or (sitename == 'Coleridge')): TLI_limit = 2.0 y_lim = 3.5 TP_limit = 4.0 TN_limit = 73.0 chla_limit = 0.82 elif ((sitename == 'Emma') or (sitename == 'Emily') or (sitename == 'Georgina') or (sitename == 'MaoriFront') or (sitename == 'MaoriBack')): TLI_limit = 4.0 y_lim = 6.0 TP_limit = 20.0 TN_limit = 340.0 chla_limit = 5.0 elif (sitename == 'Denny'): TLI_limit = 3.0 y_lim = 7.0 TP_limit = 9.0 TN_limit = 160.0 chla_limit = 2.0 # PC5 elif ((sitename == 'Ohau') or (sitename == 'Pukaki') or (sitename == 'Tekapo')): TLI_limit = 1.7 y_lim = 3.5 TP_limit = 10.0 TN_limit = 160.0 chla_limit = 2.0 elif (sitename == 'Benmore_Haldon'): TLI_limit = 2.7 y_lim = 5.0 TP_limit = 10.0 TN_limit = 160.0 chla_limit = 2.0 elif (sitename == 'Benmore_Dam'): TLI_limit = 2.7 y_lim = 5.0 TP_limit = 10.0 TN_limit = 160.0 chla_limit = 2.0 elif (sitename == 'Benmore_Ahuriri'): TLI_limit = 2.9 y_lim = 5.0 TP_limit = 10.0 TN_limit = 160.0 chla_limit = 5.0 elif (sitename == 'Aviemore'): TLI_limit = 2.0 y_lim = 5.0 TP_limit = 10.0 TN_limit = 160.0 chla_limit = 2.0 elif (sitename == 'McGregor'): TLI_limit = 3.2 y_lim = 5.0 TP_limit = 20.0 TN_limit = 350.0 chla_limit = 2.0 elif (sitename == 'Middleton'): TLI_limit = 3.6 y_lim = 5.0 TP_limit = 10.0 TN_limit = 160.0 chla_limit = 2.0 elif (sitename == 'Alexandrina'): TLI_limit = 3.1 y_lim = 5.0 TP_limit = 10.0 TN_limit = 350.0 chla_limit = 2.0 elif (sitename == 'Opuha'): TLI_limit = 4.0 y_lim = 5.0 TP_limit = 20.0 TN_limit = 340.0 chla_limit = 5.0 elif ((sitename == 'Kellands_shore') or (sitename == 'Kellands_mid')): TLI_limit = 3.2 y_lim = 6.0 TP_limit = 10.0 TN_limit = 500.0 chla_limit = 2.0 else: TLI_limit = 3.0 y_lim = 5.0 TP_limit = 9.0 TN_limit = 160.0 chla_limit = 2.0 sq = site print sq filename = sq + '_scatterplot' fig, ax = plt.subplots(2, 2, figsize=(8.5, 5)) ax[0, 0].plot(df.Date, df.TP, marker='.', linestyle='None') ax[0, 0].legend(frameon=True, facecolor='white', framealpha=1.0) ax[0, 0].set_ylabel('Total Phosphorus in microg/L') # ax[0,0].axhline(y = TP_limit, linewidth=2, color='#d62728', label = 'CLWRP limit') ax[0, 0].set_title(sitename1) ax[0, 1].plot(df2.Date, df2.TN, marker='.', linestyle='None') ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[0, 1].set_ylabel('Total Nitrogen in microg/L') # ax[0,1].axhline(y = TN_limit, linewidth=2, color='#d62728', label = 'CLWRP limit') ax[1, 0].plot(df3.Date, df3.chla, marker='.', linestyle='None') ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 0].set_ylabel('Chlorophyll a in microg/L') # ax[1,0].axhline(y = chla_limit, linewidth=2, color='#d62728', label = 'CLWRP limit') ax[1, 1].plot(df4.Date, df4.Turbidity, marker='.', linestyle='None') ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 1].set_ylabel('Turbidity in NTU') plt.tight_layout() plt.show() plt.savefig(str(datapath_out) + filename + '.png') plt.close() # TLI caluclations (HCL) ### for Lake For Lake Benmore non-detects are treated as dl, not half dl if ((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri') or (sitename == 'Benmore_Haldon')): measurement = 'Total Phosphorus' wq1 = get_data(base_url, hts, site, measurement, from_date='2004-01-12', to_date=enddate).reset_index() dates_TP = wq1['DateTime'] TP1 = wq1['Value'] TP_1 = pd.to_numeric(TP1, errors='coerce') TP_values = TP_1.astype(float).fillna(0.004).values TP = numpy.zeros(len(TP_values)) for i in range(0, len(TP_values)): if ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055): TP[i] = 4.0 elif (TP_values[i] == 0.002): TP[i] = 4.0 else: TP[i] = 1000.0 * TP_values[i] raw_data = {'Date': dates_TP, 'TP': TP} df = pd.DataFrame(raw_data, columns=['Date', 'TP']) df.set_index(['Date']) # TLI caluclations (HCL) new_df = df2.set_index('Date').copy() new_df.index = pd.to_datetime(new_df.index) TLN_data = new_df.resample('A-JUN').mean() # annual mean for hydro year TLN2 = TLN_data.resample('A-JUN').apply( lambda x: -3.61 + 3.01 * numpy.log10(x)) ## annual means TN_mean1 = new_df.resample('A-JUN').mean() # annual mean for hydro year TN_mean = TN_mean1.TN Years_TN = pd.DatetimeIndex(TN_mean.index).year raw_data5 = {'Year': Years_TN, 'TNmean': TN_mean} df_Mean_TN = pd.DataFrame(raw_data5, columns=['Year', 'TNmean']) new_df = df.set_index('Date').copy() new_df.index = pd.to_datetime(new_df.index) TLP_data = new_df.resample('A-JUN').mean() TLP2 = TLP_data.resample('A-JUN').apply( lambda x: 0.218 + 2.92 * numpy.log10(x)) ## annual means TP_mean1 = new_df.resample('A-JUN').mean() TP_mean = TP_mean1.TP Years_TP = pd.DatetimeIndex(TP_mean.index).year raw_data6 = {'Year': Years_TP, 'TPmean': TP_mean} df_Mean_TP = pd.DataFrame(raw_data6, columns=['Year', 'TPmean']) new_df = df3.set_index('Date').copy() new_df.index = pd.to_datetime(new_df.index) TLC_data = new_df.resample('A-JUN').mean() TLC2 = TLC_data.resample('A-JUN').apply( lambda x: 2.22 + 2.54 * numpy.log10(x)) ## annual means chla_mean1 = new_df.resample('A-JUN').mean() chla_mean = chla_mean1.chla Years_chla = pd.DatetimeIndex(chla_mean.index).year raw_data7 = {'Year': Years_chla, 'chla_mean': chla_mean} df_Mean_chla = pd.DataFrame(raw_data7, columns=['Year', 'chla_mean']) # Calculate TLI and make array with years TLI_data = (TLN2.TN + TLP2.TP + TLC2.chla) / 3.0 Years = pd.DatetimeIndex(TLI_data.index).year # Output to csv TLI_data.to_csv(str(datapath_out) + 'TLI' + sitename + '.csv') # , header= ['Year', 'TLI_score']) # Plot TLI barchart with "limit' line n_groups = len(TLI_data) fig, ax = plt.subplots() index = numpy.arange(n_groups) bar_width = 0.35 # Graph filename = sq + '_TLI' rects1 = ax.bar( index, TLI_data, bar_width, color='b', # yerr=std_men, error_kw=error_config, label='TLI') ax.set_ylim(0, y_lim) ax.set_xlabel('Year', fontsize=14) ax.set_ylabel('TLI Score', fontsize=14) ax.set_title(sitename1, fontsize=16) ax.set_xticks(index) # + bar_width/2) ax.set_xticklabels((Years)) plt.axhline(y=TLI_limit, linewidth=4, color='#d62728', label='CLWRP limit') ax.legend() fig.tight_layout() plt.show() plt.savefig(str(datapath_out) + filename + '.png') ### means, plan compliance # print TN_mean # print TP_mean # print chla_mean Years_ab = Years # Years_ab=["%02d" % b for b in range(Years_abr)] # Years_ab = str(Years_abr).zfill(2) print Years_ab if (sitename != 'Kellands_shore'): filename = sq + '_scatterplot_means' fig, ax = plt.subplots(2, 2, figsize=(8.5, 5)) ax[0, 0].bar(index, TLI_data, bar_width, color='b', label='TLI') ax[0, 0].axhline(y=TLI_limit, linewidth=2, color='#d62728', label='CLWRP limit') # ax[0,0].legend(frameon=True, facecolor = 'white', framealpha = 1.0) ax[0, 0].set_ylabel('TLI Score') ax[0, 0].set_xticks(index) # + bar_width/2) ax[0, 0].set_xticklabels((Years_ab), rotation='vertical') ax[0, 0].set_title(sitename1) ax[0, 1].bar(index, TN_mean, bar_width, color='b', label='TN annual mean') ax[0, 1].axhline(y=TN_limit, linewidth=2, color='#d62728') #, label = 'CLWRP limit') ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[0, 1].set_ylabel('Total Nitrogen in microg/L') ax[0, 1].set_xticks(index) # + bar_width/2) ax[0, 1].set_xticklabels((Years_ab), rotation='vertical') ax[1, 0].bar(index, chla_mean, bar_width, color='b', label='Chla annual mean') ax[1, 0].axhline(y=chla_limit, linewidth=2, color='#d62728') #, label = 'CLWRP limit') ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 0].set_ylabel('Chlorophyll a in microg/L') ax[1, 0].set_xlabel('Year') ax[1, 0].set_xticks(index) # + bar_width/2) ax[1, 0].set_xticklabels((Years_ab), rotation='vertical') ax[1, 1].bar(index, TP_mean, bar_width, color='b', label='TP annual mean') ax[1, 1].axhline(y=TP_limit, linewidth=2, color='#d62728') #, label = 'CLWRP limit') ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 1].set_ylabel('Total Phosphorus in microg/L') ax[1, 1].set_xlabel('Year') ax[1, 1].set_xticks(index) # + bar_width/2) ax[1, 1].set_xticklabels((Years_ab), rotation='vertical') plt.tight_layout() plt.show() plt.savefig(str(datapath_out) + filename + '.png') plt.close() ####### # redo scatterplots for lakes with little data if ((sitename == 'Catherine') or (sitename == 'Denny') or (sitename == 'Evelyn') or (sitename == 'Henrietta') or (sitename == 'McGregor') or (sitename == 'Kellands_mid') or (sitename == 'Opuha') or (sitename == 'Emily') or (sitename == 'MaoriBack') or (sitename == 'MaoriFront')): filename = sq + '_scatterplot' fig, ax = plt.subplots(2, 2, figsize=(8.5, 5)) ax[0, 0].plot(df.Date, df.TP, marker='.', linestyle='None') # ax[0,0].axhline(y = TP_limit, linewidth=4, color='#d62728', label = 'CLWRP limit') ax[0, 0].legend(frameon=True, facecolor='white', framealpha=1.0) ax[0, 0].set_ylabel('Total Phosphorus in microg/L') ax[0, 0].set_title(sitename1) ax[0, 1].plot(df2.Date, df2.TN, marker='.', linestyle='None') # ax[0,1].axhline(y = TN_limit, linewidth=2, color='#d62728', label = 'CLWRP limit') ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[0, 1].set_ylabel('Total Nitrogen in microg/L') ax[1, 0].plot(df3.Date, df3.chla, marker='.', linestyle='None') # ax[1,0].axhline(y = chla_limit, linewidth=2, color='#d62728', label = 'CLWRP limit') ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 0].set_ylabel('Chlorophyll a in microg/L') ax[1, 1].plot(df4.Date, df4.Turbidity, marker='.', linestyle='None') ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 1].set_ylabel('Turbidity in NTU') plt.tight_layout() # fig.autofmt_xdate() fig.autofmt_xdate(bottom=0.2, rotation=45, ha='right') plt.show() plt.savefig(str(datapath_out) + filename + '.png') plt.close() # force chla on same date scale as other parameters if (sitename == 'Kellands_shore'): filename = sq + '_scatterplot' fig, ax = plt.subplots(2, 2, figsize=(8.5, 5)) ax[0, 0].plot(df.Date, df.TP, marker='.', linestyle='None') ax[0, 0].legend(frameon=True, facecolor='white', framealpha=1.0) ax[0, 0].set_ylabel('Total Phosphorus in microg/L') # ax[0,0].axhline(y = TP_limit, linewidth=4, color='#d62728', label = 'CLWRP limit') ax[0, 0].set_title(sitename1) ax[0, 1].plot(df2.Date, df2.TN, marker='.', linestyle='None') ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[0, 1].set_ylabel('Total Nitrogen in microg/L') ax[1, 0].plot(df3.Date, df3.chla, marker='.', linestyle='None') ax[1, 0].set_xlim('2004-01-12', '2017-07-01') ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 0].set_ylabel('Chlorophyll a in microg/L') ax[1, 1].plot(df4.Date, df4.Turbidity, marker='.', linestyle='None') ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0) ax[1, 1].set_ylabel('Turbidity in NTU') plt.tight_layout() fig.autofmt_xdate() plt.show() plt.savefig(str(datapath_out) + filename + '.png') plt.close() # ########### Write all data to csv # df['SiteName'] = 'Lake'+sitename # df['SQ'] = site # df['Parameter'] = 'TP' # # df.to_csv(str(datapath_out)+'TP_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)#, header= ['Date','TP','Sitename','SQ','Parameter']) # print 'written csv' # # df2['SiteName'] = 'Lake'+sitename # df2['SQ'] = site # df2['Parameter'] = 'TN' # df2.to_csv(str(datapath_out)+'TN_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)#, header= ['Date','TP','Sitename','SQ','Parameter']) # print 'written csv' # # df3['SiteName'] = 'Lake'+sitename # df3['SQ'] = site # df3['Parameter'] = 'chla' ## print df3 # df3.to_csv(str(datapath_out)+'chla_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)#, header= ['Date','TP','Sitename','SQ','Parameter']) # print 'written csv' # # ### write means # df_Mean_chla['SiteName'] = 'Lake'+sitename # df_Mean_chla['SQ'] = site # df_Mean_chla['Parameter'] = 'chla_mean' # df_Mean_chla.to_csv(str(datapath_out)+'ChlaMeans_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False) # print 'written csv' # # df_Mean_TN['SiteName'] = 'Lake'+sitename # df_Mean_TN['SQ'] = site # df_Mean_TN['Parameter'] = 'TN_mean' # df_Mean_TN.to_csv(str(datapath_out)+'TNMeans_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False) # print 'written csv' # # df_Mean_TP['SiteName'] = 'Lake'+sitename # df_Mean_TP['SQ'] = site # df_Mean_TP['Parameter'] = 'TP_mean' # df_Mean_TP.to_csv(str(datapath_out)+'TPMeans_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False) # print 'written csv' a = TLI_data return a
to_date = '2017-01-01' sites = site_list(base_url, hts) mtype_df1 = measurement_list(base_url, hts, site) mtypes_list = [] for s in sites[:10]: mtype_df1 = measurement_list(base_url, hts, s) mtypes_list.append(mtype_df1) mtypes_all = pd.concat(mtypes_list) url = build_url(base_url, hts, 'MeasurementList', s) mtype_df2 = wq_sample_parameter_list(base_url, hts, site) tsdata1 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date) tsdata2, extra2 = get_data(base_url, hts, site, measurement, parameters=True) tsdata3 = get_data(base_url, hts, site, 'WQ Sample') sample_param_list = [] for s in sites[:10]: site_sample_param = wq_sample_parameter_list(base_url, hts, s) sample_param_list.append(site_sample_param) sample_param_df = pd.concat(sample_param_list) ### Usage base_url = 'http://wateruse.ecan.govt.nz' hts = 'WaterUse.hts' site = 'L36/1764-M1'
mtype_list.append(m1) mtypes = pd.concat(mtype_list).reset_index() mtypes1 = mtypes[mtypes.To >= from_date] mtypes2 = mtypes1[~mtypes1.Measurement.str. contains('regularity', case=False)].sort_values( 'To').drop_duplicates('Site', keep='last') ## Pull out the usage data and process tsdata_list = [] for i, row in mtypes2.iterrows(): timer = 10 while timer > 0: try: t1 = ws.get_data(param['Input']['hilltop_base_url'], param['Input']['hilltop_hts'], row['Site'], row['Measurement'], str(from_date), str(row['To'])) break except Exception as err: err1 = err timer = timer - 1 if timer == 0: raise ValueError(err1) else: print(err1) sleep(3) tsdata_list.append(t1) tsdata1 = pd.concat(tsdata_list) tsdata2 = util.proc_ht_use_data_ws(tsdata1)
def get_volume_data(base_url, hts, site, measurement, from_date, to_date): """Extracts compliance volume data from Hilltop for a given date range, and sums the reading counts for each day""" tsdata = ws.get_data(base_url, hts, site, measurement, from_date, to_date) dfdata = tsdata.reset_index().drop(columns='Site').drop( columns='Measurement') return dfdata