Пример #1
0
def extract_water_use_data(dataframe, site):
    """This function iterates through a measurement list, extracting water use
    data from Hilltop, and compiling it into a dataframe"""
    # Set base parameters
    base_url = 'http://wateruse.ecan.govt.nz'
    hts = 'WaterUse.hts'    
    # Create empty dataframe to append raw data into
    raw_data = pd.DataFrame(columns = ['Measurement','DateTime','Value'])
    # Find the start date of the time series
    from_d = dataframe['FromDate'].iloc[0]
    # Iterate through measurement list, extracting data and compiling
    for index, row in dataframe.iterrows():
        measurement = row['Measurement']
        to_d = row['ToDate']
        if from_d <= to_d:
            try:
                print("Extracting {0} data from {1} to {2}".format(measurement, from_d, to_d))
                tsdata = ws.get_data(base_url, hts, site, measurement, from_date=str(from_d), to_date=str(to_d))
                tsdata2 = tsdata.reset_index().drop(columns='Site')
                raw_data = pd.concat([raw_data, tsdata2], ignore_index=True)
                # Adjust start date to prevent overlapping time series
                from_d = to_d + dt.timedelta(days=1)
            except:
                print('No data extracted for:', measurement)
                from_d = to_d + dt.timedelta(days=1)
        else:
            print('Skipping extraction for:', measurement)
    return raw_data
Пример #2
0
def test_get_data1():
    tsdata1 = get_data(base_url,
                       hts,
                       site,
                       measurement,
                       from_date=from_date,
                       to_date=to_date)
    assert len(tsdata1) > 80
Пример #3
0
def test_get_data3(data):
    tsdata3 = get_data(data['base_url'],
                       data['hts'],
                       data['site'],
                       'WQ Sample',
                       from_date=data['from_date'],
                       to_date=data['to_date'])
    assert len(tsdata3) > 800
Пример #4
0
def test_get_data1(data):
    tsdata1 = get_data(data['base_url'],
                       data['hts'],
                       data['site'],
                       data['measurement'],
                       from_date=data['from_date'],
                       to_date=data['to_date'])
    assert len(tsdata1) > 80
Пример #5
0
def test_get_data3():
    tsdata3 = get_data(base_url,
                       hts,
                       site,
                       'WQ Sample',
                       from_date=from_date,
                       to_date=to_date)
    assert len(tsdata3) > 800
def extract_water_use_data(site, measurement, from_d, to_d):
    """This function extracts water use data from Hilltop, and compiles it into a dataframe"""
    # Set base parameters
    base_url = 'http://wateruse.ecan.govt.nz'
    hts = 'WaterUse.hts'
    tsdata = ws.get_data(base_url, hts, site, measurement, from_date=str(from_d), to_date=str(to_d))
    tsdata2 = tsdata.reset_index().drop(columns='Site')
    return tsdata2
Пример #7
0
def test_get_data2(data):
    tsdata2, extra2 = get_data(data['base_url'],
                               data['hts'],
                               data['site'],
                               data['measurement'],
                               from_date=data['from_date'],
                               to_date=data['to_date'],
                               parameters=True)
    assert (len(tsdata2) > 80) & (len(extra2) > 300)
def get_volume_data(site, from_date, to_date):
    """Extracts compliance volume data from Hilltop for a given date range, and sums the reading counts for each day"""
    base_url = 'http://wateruse.ecan.govt.nz'
    hts = 'WaterUse.hts'
    measurement = 'Compliance Volume'
    tsdata = ws.get_data(base_url, hts, site, measurement, from_date, to_date)
    vol_data = tsdata.reset_index().drop(columns='Site').drop(
        columns='Measurement')
    return vol_data
Пример #9
0
def test_get_data2():
    tsdata2, extra2 = get_data(base_url,
                               hts,
                               site,
                               measurement,
                               from_date=from_date,
                               to_date=to_date,
                               parameters=True)
    assert (len(tsdata2) > 80) & (len(extra2) > 300)
def test_site_mtypes(hts):
    sites = site_list(base_url, hts)
    site1 = sites.iloc[2].SiteName
    mtype_df1 = measurement_list(base_url, hts, site1).reset_index().iloc[0]
    tsdata1 = get_data(base_url,
                       hts,
                       site1,
                       mtype_df1.Measurement,
                       from_date=str(mtype_df1.From),
                       to_date=str(mtype_df1.From))
    assert len(tsdata1) == 1
Пример #11
0
def test_get_data4():
    tsdata4, extra4 = get_data(base_url,
                               hts,
                               site,
                               measurement,
                               from_date=from_date,
                               to_date=to_date,
                               parameters=True,
                               dtl_method=dtl_method)
    assert (len(tsdata4) >
            80) & (len(extra4) > 300) & (tsdata4.Value.dtype.name == 'float32')
Пример #12
0
def test_get_data4(data):
    tsdata4, extra4 = get_data(data['base_url'],
                               data['hts'],
                               data['site'],
                               data['measurement'],
                               from_date=data['from_date'],
                               to_date=data['to_date'],
                               parameters=True,
                               dtl_method=data['dtl_method'])
    assert (len(tsdata4) > 80) & (len(extra4) > 300) & (tsdata4.Value.dtype
                                                        == np.number)
def get_volume_data(base_url, hts, site, measurement, from_date, to_date):
    """Extracts compliance volume data from Hilltop for a given date range, and sums the reading counts for each day"""
    tsdata = ws.get_data(base_url, hts, site, measurement, from_date, to_date)
    dfdata = tsdata.reset_index().drop(columns='Site').drop(
        columns='Measurement')
    dfdata['Date'] = dfdata['DateTime'].dt.date
    daily_counts = dfdata.groupby(['Date'])['Value'].agg(
        ['count']).rename(columns={'count': 'Readings'})
    idx = pd.date_range(from_date, to_date)
    daily_counts2 = daily_counts.reindex(idx, fill_value=0)
    return daily_counts2
def extract_water_use_data(site, measurement, from_d, to_d):
    """This function extracts water use data from Hilltop for a specified site,
    measurement type and date range"""
    print("Processing {} data".format(measurement))
    # Set base parameters
    base_url = 'http://wateruse.ecan.govt.nz'
    hts = 'WaterUse.hts'
    # Extract data
    tsdata = ws.get_data(base_url,
                         hts,
                         site,
                         measurement,
                         from_date=str(from_d),
                         to_date=str(to_d))
    tsdata2 = tsdata.reset_index().drop(columns='Site')
    return tsdata2
Пример #15
0
def ecan_ts_data(server,
                 database,
                 site_ts_summ,
                 from_date,
                 to_date,
                 dtl_method=None):
    """

    """
    dataset1 = site_ts_summ.DatasetTypeID.iloc[0]
    sites1 = site_ts_summ.ExtSiteID.unique().tolist()

    if dataset1 < 10000:
        ts1 = mssql.rd_sql(server,
                           database,
                           ts_table, ['ExtSiteID', 'DateTime', 'Value'],
                           where_in={
                               'DatasetTypeID': [dataset1],
                               'ExtSiteID': sites1
                           },
                           from_date=from_date,
                           to_date=to_date,
                           date_col='DateTime')
    else:
        ts_list = []
        mtype = site_ts_summ.MeasurementType.iloc[0]
        for s in sites1:
            ts0 = ws.get_data(base_url,
                              hts,
                              s,
                              mtype,
                              from_date,
                              to_date,
                              dtl_method=dtl_method)
            ts_list.append(ts0)
        ts1 = pd.concat(ts_list).reset_index().drop('Measurement', axis=1)
        ts1.rename(columns={'Site': 'ExtSiteID'}, inplace=True)

    return ts1
Пример #16
0
mtypes = []
for s in sites['Site'].values:
    mtypes.append(ws.measurement_list(ecan_base_url, hts_name, s, mtype))

mtypes_df = pd.concat(mtypes).reset_index().drop('DataType', axis=1)

mtypes_df['n_days'] = (mtypes_df['To'] - mtypes_df['From']).dt.days

mtypes_df1 = mtypes_df[mtypes_df['n_days'] >= min_n_days].copy()

sites1 = pd.merge(sites, mtypes_df1, on='Site')

ts_data = []
for index, row in sites1.iterrows():
    ts_data.append(
        ws.get_data(ecan_base_url, hts_name, row['Site'], row['Measurement']))

ts_data_df = pd.concat(ts_data)
ts_data_df.index = ts_data_df.index.droplevel('Measurement')

ts_data_df2 = ts_data_df['Value'].unstack(0)

#################################################
### Process station data

period_data1 = ts_data_df2[from_date:to_date].copy()
missing_days = period_data1.isnull().sum()
good_ones = missing_days[
    missing_days <= min_missing_days].reset_index()['Site']

good_sites1 = period_data1.loc[:, period_data1.columns.isin(good_ones
Пример #17
0
def calcTLI(x):
    print sitename
    
#    ## TP #####
    measurement = 'Total Phosphorus'
    wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate,dtl_method='half').reset_index()    #, dtl_method='half')
    dates_TP = wq1['DateTime']

############# remove this when data base fixed
    TP1 = wq1['Value']
    TP_1 = pd.to_numeric(TP1, errors='coerce')
    TP_values = TP_1.astype(float).fillna(0.002).values
    TP = numpy.zeros(len(TP_values))
###### remove this when database fixed
    for i in range (0,len(TP_values)):
        if ((sitename == 'Sumner') or (sitename == 'Coleridge')) and (TP_values[i] > 0.055): 
            TP[i] = 2.0    
        elif ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055): 
            TP[i] = 4.0 
        elif ((sitename == 'Marion') and (TP_values[i] > 0.3)):
            TP[i] = 13.0
        # For Lake Benmore non-detects are treated as dl, not half dl
        elif (((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri') or (sitename == 'Benmore_Haldon')) and (TP_values[i] == 0.002)):
            TP[i] = 4.0   
        else:
            TP[i] = 1000.0*TP_values[i]   
  
    raw_data = {'DateTP': dates_TP,'TP': TP}
################ remove to here   
### put back in: 
#    TP_1 = 1000.0*pd.to_numeric(TP1, errors='coerce')
####   make new dataframe
#    raw_data = {'Date': dates_TP,'TP': TP_values}
    df = pd.DataFrame(raw_data, columns = ['DateTP', 'TP'])
    
    ##### drop Benmore Boat data
    if (sitename == 'Benmore_Haldon'):
#        df['Date']= pd.to_datetime(df['Date'])
#        df['Date'] = df['Date'].apply(lambda x: x.date())
#        print df.DateTP
##        date_list = ('2018-10-26')
#        date_list = pd.to_datetime('2018-10-26').date()
##        df.drop(pd.to_datetime('2018-10-26'))
##        date_list = [datetime(2018, 10, 26),
##                 datetime(2018, 11, 20),
##                 datetime(2018, 12, 19),
##                 datetime(2019, 1, 21),
##                 datetime(2019, 2, 12),
##                 datetime(2019, 3, 18),
##                 datetime(2019, 4, 12)]
#        print date_list
##        df = df.drop(df.Date[date_list])
        df = df.drop([df.index[60],df.index[61],df.index[62],df.index[65],df.index[67]])## 21-1-19 and 12-4-19 not pushed through yet
        print df
        #https://stackoverflow.com/questions/35372499/how-can-i-delete-rows-for-a-particular-date-in-a-pandas-dataframe
        #https://thispointer.com/python-pandas-how-to-drop-rows-in-dataframe-by-index-labels/
#       
    ### TN ###
    measurement = 'Total Nitrogen'
    wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate, dtl_method='half').reset_index()
    dates_TN = wq1['DateTime']
    TN1 = wq1['Value']   
    TN_1 = pd.to_numeric(TN1, errors='coerce')
    TN_values = TN_1.astype(float).fillna(0.005).values

############# remove this when data base fixed
    TN = numpy.zeros(len(TN_values))

    for i in range (0,len(TN_values)):
        if ((sitename == 'Marion') and (TN_values[i] > 1.3)):
            TN[i] = 350.0
        else:
            TN[i] = 1000.0*TN_values[i]

    raw_data2 = {'DateTN': dates_TN,'TN': TN}
############### remove to here 
  
############# put this back in
#    TN_values = 1000.0*pd.to_numeric(TN1, errors='coerce')
#    ## make data frame with Date,TN
#    raw_data2 = {'Date': dates_TN,'TN': TN_values}
    df2 = pd.DataFrame(raw_data2, columns = ['DateTN', 'TN'])
    
        ##### drop Benmore Boat data
    if (sitename == 'Benmore_Haldon'):
#        print df2.DateTN
        df2 = df2.drop([df2.index[60],df2.index[61],df2.index[62],df2.index[65],df2.index[67]])## 21-1-19 and 12-4-19 not pushed through yet
        print df2

############## chla ###                     
    measurement = 'Chlorophyll a (planktonic)'
    wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate, dtl_method='half').reset_index()
    dates_chla = wq1['DateTime']  
    chla1 = wq1['Value']    
    chla_1 = pd.to_numeric(chla1, errors='coerce')
    chla_values2 = chla_1.astype(float).fillna(0.1).values                    

    chla = numpy.zeros(len(chla_values2))
    for i in range (0,len(chla_values2)):
############# remove this when data base fixed
        if ((sitename == 'Marion') and (chla_values2[i] > 50.0)):
            chla[i] = 2.7
############### remove to her and change elif to if
#        elif (chla_values2[i] < 0.19):
#            chla[i] = 1000.0*chla_values2[i]
#            #remove next two lines when 2011 marhc april fixed
        elif (chla_values2[i] > 150):
            chla[i] = chla_values2[i]/1000.0
        else:
            chla[i] = chla_values2[i]
    
    raw_data3 = {'DateChla': dates_chla,'chla': chla}
    df3 = pd.DataFrame(raw_data3, columns = ['DateChla', 'chla'])
    
    ##### drop Benmore Boat data
    if (sitename == 'Benmore_Haldon'):
#        print df3.DateChla
        df3 = df3.drop([df3.index[62],df3.index[63],df3.index[64],df3.index[67],df3.index[69]])## 21-1-19 and 12-4-19 not pushed through yet
        print df3

    df.set_index(['DateTP'])
    df2.set_index(['DateTN'])
    df3.set_index(['DateChla'])   
    
   
    sq = site
    print sq 
    
### for Lake  For Lake Benmore non-detects are treated as dl, not half dl
    if ((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri') or (sitename == 'Benmore_Haldon')): 
        measurement = 'Total Phosphorus'
        wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate).reset_index()
        dates_TP = wq1['DateTime']

        TP1 = wq1['Value']
        TP_1 = pd.to_numeric(TP1, errors='coerce')
        TP_values = TP_1.astype(float).fillna(0.004).values
        TP = numpy.zeros(len(TP_values))
        for i in range (0,len(TP_values)):
            if ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055): 
                TP[i] = 4.0 
            elif (TP_values[i] == 0.002):
                TP[i] = 4.0   
            else:
                TP[i] = 1000.0*TP_values[i]         
        raw_data = {'DateTP': dates_TP,'TP': TP}
        df = pd.DataFrame(raw_data, columns = ['DateTP', 'TP'])   
        df.set_index(['DateTP'])
        
        
###################################################################
##output csv withdate, chla, Tn, TP, Turbidity for timetrends
##https://pypi.org/project/pymannkendall/
#    
#    ### Turbidity ### 
#
#    measurement = 'Turbidity'
#    wq1 = get_data(base_url, hts, site, measurement, from_date=startdate, to_date=enddate).reset_index()
#    dates_Turbidity = wq1['DateTime']
#    Turbidity_values1 = wq1['Value']
#    
#    Turb_1 = pd.to_numeric(Turbidity_values1, errors='coerce')
#    Turbidity_values = Turb_1.astype(float).fillna(0.1).values   
#    
#    raw_data4 = {'DateT': dates_Turbidity,'Turbidity': Turbidity_values}
#    df4 = pd.DataFrame(raw_data4, columns = ['DateT', 'Turbidity'])
#    
#        ##### drop Benmore Boat data
#    if (sitename == 'Benmore_Haldon'):
#        print df4.DateT
#        df4 = df4.drop([df4.index[59],df4.index[60],df4.index[61],df4.index[64],df4.index[66]])## 21-1-19 and 12-4-19 not pushed through yet
#        print df4
#
#    df4.set_index(['DateT'])
#
###Output
##### build dataframe with all
#    dfcombined= pd.concat([df, df2, df3, df4], axis=1, join_axes=[df.index])
#    # Output to csv
#    dfcombined.to_csv(str(datapath_out)+'TT_'+sitename+'.csv')
#    
    
###############################

# Medians and NPS bands
#TN
    new_df = df2.set_index('DateTN').copy()
    new_df.index = pd.to_datetime(new_df.index)

    ## annual medians
    TN_mean1 = new_df.resample('A-JUN').median() # annual meadian for hydro year 
    TN_mean = TN_mean1.TN 
    Years_TN = pd.DatetimeIndex(TN_mean.index).year
                               
    ######### Polymictic lakes
    if ((sitename == 'Emma') or (sitename == 'Emily') or (sitename == 'Georgina') or (sitename == 'MaoriFront') or (sitename == 'MaoriBack') or (sitename == 'Denny') or (sitename == 'McGregor') or (sitename == 'Middleton')or (sitename == 'Kellands_shore') or (sitename == 'Kellands_mid')):
        
        TN_bands = numpy.zeros(len(TN_mean))
        for i in range(0,len(TN_mean)):
            if (TN_mean[i] <= 300.0):
                TN_bands[i] = 1.0
            elif (300.0 <TN_mean[i] <= 500.0):
                 TN_bands[i] = 2.0
            elif (500.0 <TN_mean[i] <= 800.0):
                 TN_bands[i] = 3.0
            elif (TN_mean[i] > 800.0):
                 TN_bands[i] = 4.0
            else:
                TN_bands[i] = 100.0    
#        print TN_bands        
    else:
        TN_bands = numpy.zeros(len(TN_mean))
        for i in range(0,len(TN_mean)):
            if (TN_mean[i] <= 160.0):
                TN_bands[i] = 1.0
            elif (160.0 <TN_mean[i] <= 350.0):
                 TN_bands[i] = 2.0
            elif (350.0 <TN_mean[i] <= 750.0):
                 TN_bands[i] = 3.0
            elif (TN_mean[i] > 750.0):
                 TN_bands[i] = 4.0
            else:
                TN_bands[i] = 100.0    
#        print TN_bands
    
#    TN_bands = []
#    for i in range(0,len(TN_mean)):
#        if (TN_mean[i] < 160.1):
#            TN_bands[i] = 'A'
#        elif (160.1 <TN_mean[i] < 350.1):
#             TN_bands[i] = 'B'
#        elif (350.1 <TN_mean[i] < 750.1):
#             TN_bands[i] = 'C'
#        elif (TN_mean[i] > 750.1):
#             TN_bands[i] = 'D'
#        else:
#            TN_bands[i] = 'NA'
#    print TN_bands
                               
#    raw_data5 = {'Year': Years_TN,'TNmean': TN_mean, 'TNBand': TN_bands}
#    df_Mean_TN= pd.DataFrame(raw_data5, columns = ['Year', 'TNmean','TNBand'])
##    print df_Mean_TN
    
    # TP    
    new_df = df.set_index('DateTP').copy()
    new_df.index = pd.to_datetime(new_df.index)
    ## annual means
    TP_mean1 = new_df.resample('A-JUN').median() 
    TP_mean = TP_mean1.TP 
    Years_TP = pd.DatetimeIndex(TP_mean.index).year
                               
    TP_bands = numpy.zeros(len(TP_mean))
    for i in range(0,len(TP_mean)):
        if (TP_mean[i] <= 10.0):
            TP_bands[i] = 1.0
        elif (10.0 <TP_mean[i] <= 20.0):
             TP_bands[i] = 2.0
        elif (20.0 <TP_mean[i] <= 50.0):
             TP_bands[i] = 3.0
        elif (TP_mean[i] > 50.0):
             TP_bands[i] = 4.0
        else:
            TP_bands[i] = 100.0    
#    print TP_bands
                               
#    raw_data6 = {'Year': Years_TP,'TPmean': TP_mean}
#    df_Mean_TP= pd.DataFrame(raw_data6, columns = ['Year', 'TPmean'])
    
    #Chla
    new_df = df3.set_index('DateChla').copy()
    new_df.index = pd.to_datetime(new_df.index)

    ## annual means    
    chla_mean1 = new_df.resample('A-JUN').median()
    chla_mean = chla_mean1.chla 
    Years_chla = pd.DatetimeIndex(chla_mean.index).year
                                 
    chla_bands = numpy.zeros(len(chla_mean))
    for i in range(0,len(chla_mean)):
        if (chla_mean[i] <= 2.0):
            chla_bands[i] = 1.0
        elif (2.0 <chla_mean[i] <= 5.0):
             chla_bands[i] = 2.0
        elif (5.0 <chla_mean[i] <= 12.0):
             chla_bands[i] = 3.0
        elif (chla_mean[i] > 12.0):
             chla_bands[i] = 4.0
        else:
            chla_bands[i] = 100.0    
#    print chla_bands
    
    ## annual max  
    chla_max1 = new_df.resample('A-JUN').max()
    chla_max = chla_max1.chla 
                                 
    chla_max_bands = numpy.zeros(len(chla_max))
    for i in range(0,len(chla_max)):
        if (chla_max[i] <= 10.0):
            chla_max_bands[i] = 1.0
        elif (10.0 <chla_max[i] <= 25.0):
             chla_max_bands[i] = 2.0
        elif (25.0 <chla_max[i] <= 60.0):
             chla_max_bands[i] = 3.0
        elif (chla_max[i] > 60.0):
             chla_max_bands[i] = 4.0
        else:
            chla_max_bands[i] = 100.0    
#    print chla_max_bands
                      
                                 
#    raw_data7 = {'Year': Years_chla,'chla_mean': chla_mean}
#    df_Mean_chla= pd.DataFrame(raw_data7, columns = ['Year', 'chla_mean'])
    
    raw_data8 = {'Year': Years_TN,'TNmean': TN_mean, 'TNBand': TN_bands,'TPmean': TP_mean, 'TPBand': TP_bands,'chlamean': chla_mean, 'chlaBand': chla_bands,'chlamax': chla_max, 'chlaMaxBand': chla_max_bands}
    
    df_NPS= pd.DataFrame(raw_data8, columns = ['Year','TNmean','TNBand','TPmean','TPBand','chlamean','chlaBand','chlamax','chlaMaxBand'])
    print df_NPS
    
    ##Output
#        ### build dataframe with all
#    dfcombined= pd.concat([df_TLI, df_chla, df_TP, df_TN], axis=1, join_axes=[df_TLI.index])
    # Output to csv
    df_NPS.to_csv(str(datapath_out)+'NPS_'+sitename+'.csv')
Пример #18
0
site = 'SQ31045'
measurement = 'Total Phosphorus'
from_date = '1983-11-22 10:50'
to_date = '2018-04-13 14:05'
dtl_method = 'trend'

##########################################
### Examples

## Get site list
sites = site_list(base_url, hts)

## Get the measurement types for a specific site
mtype_df1 = measurement_list(base_url, hts, site)

## Get the water quality parameter data (only applies to WQ data)
mtype_df2 = wq_sample_parameter_list(base_url, hts, site)

## Get the time series data for a specific site and measurement type
tsdata1 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date)

## Get extra WQ time series data (only applies to WQ data)
tsdata2, extra2 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date, parameters=True)

## Get WQ sample data (only applies to WQ data)
tsdata3 = get_data(base_url, hts, site, 'WQ Sample', from_date=from_date, to_date=to_date)

## Convert values under the detection limit to numeric values (only applies to WQ data)
tsdata4, extra4 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date, parameters=True, dtl_method=dtl_method)

for site in site_measurement_df.index.unique(0).tolist():
    # Loop through each measurement listed at the site
    for measurement in site_measurement_df.loc[site].index.tolist():
        # Record site measurement to which sample count data is to be appended
        counts = [site, measurement]
        # Aluminium, Total results cannot be extracted with python - skip
        # See BY20/0150 Measurement Parameters or try to view table in Manager
        if measurement == 'Aluminium, Total':
            counts += ['Unknown'] * 6
            site_measurement_counts.append(counts)
            continue
        # Call the data for the specified site and measurement
        try:
            data = ws.get_data(base_url,
                               hts,
                               site,
                               measurement,
                               from_date='1001-01-01',
                               to_date='9999-01-01')
        # Some measurements have no data (ie. BX23/0035 - Benzo[a]anthracene)
        except ValueError:
            data = pd.DataFrame()
        # Check if data exists for site and measurement
        if data.empty:
            counts += [None] * 6
        else:
            # Format data to filter by date
            data_all = data.reset_index(level=2)
            # Record the min, max, and count of dates with samples
            counts += [
                min(data_all['DateTime'].tolist()),
                max(data_all['DateTime'].tolist()),
Пример #20
0
def calcTLI(x):
    print sitename

    #    ## TP #####
    measurement = 'Total Phosphorus'
    wq1 = get_data(base_url,
                   hts,
                   site,
                   measurement,
                   from_date=startdate,
                   to_date=enddate,
                   dtl_method='half').reset_index()  #, dtl_method='half')
    dates_TP = wq1['DateTime']

    ############# remove this when data base fixed
    TP1 = wq1['Value']
    TP_1 = pd.to_numeric(TP1, errors='coerce')
    TP_values = TP_1.astype(float).fillna(0.002).values
    TP = numpy.zeros(len(TP_values))
    ###### remove this when database fixed
    for i in range(0, len(TP_values)):
        if ((sitename == 'Sumner') or
            (sitename == 'Coleridge')) and (TP_values[i] > 0.055):
            TP[i] = 2.0
        elif ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055):
            TP[i] = 4.0
        elif ((sitename == 'Marion') and (TP_values[i] > 0.3)):
            TP[i] = 13.0
        # For Lake Benmore non-detects are treated as dl, not half dl
        elif (
            ((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri') or
             (sitename == 'Benmore_Haldon')) and (TP_values[i] == 0.002)):
            TP[i] = 4.0
        else:
            TP[i] = 1000.0 * TP_values[i]

    raw_data = {'Date': dates_TP, 'TP': TP}
    ################ remove to here
    ### put back in:
    #    TP_1 = 1000.0*pd.to_numeric(TP1, errors='coerce')
    ####   make new dataframe
    #    raw_data = {'Date': dates_TP,'TP': TP_values}
    df = pd.DataFrame(raw_data, columns=['Date', 'TP'])

    ##### drop Benmore Boat data
    if (sitename == 'Benmore_Haldon'):
        #        df['Date']= pd.to_datetime(df['Date'])
        #        df['Date'] = df['Date'].apply(lambda x: x.date())
        print df.Date
        ##        date_list = ('2018-10-26')
        #        date_list = pd.to_datetime('2018-10-26').date()
        ##        df.drop(pd.to_datetime('2018-10-26'))
        ##        date_list = [datetime(2018, 10, 26),
        ##                 datetime(2018, 11, 20),
        ##                 datetime(2018, 12, 19),
        ##                 datetime(2019, 1, 21),
        ##                 datetime(2019, 2, 12),
        ##                 datetime(2019, 3, 18),
        ##                 datetime(2019, 4, 12)]
        #        print date_list
        ##        df = df.drop(df.Date[date_list])
        df = df.drop([
            df.index[60], df.index[61], df.index[62], df.index[65],
            df.index[67]
        ])  ## 21-1-19 and 12-4-19 not pushed through yet
        print df
        #https://stackoverflow.com/questions/35372499/how-can-i-delete-rows-for-a-particular-date-in-a-pandas-dataframe
        #https://thispointer.com/python-pandas-how-to-drop-rows-in-dataframe-by-index-labels/
#
### TN ###
    measurement = 'Total Nitrogen'
    wq1 = get_data(base_url,
                   hts,
                   site,
                   measurement,
                   from_date=startdate,
                   to_date=enddate,
                   dtl_method='half').reset_index()
    dates_TN = wq1['DateTime']
    TN1 = wq1['Value']
    TN_1 = pd.to_numeric(TN1, errors='coerce')
    TN_values = TN_1.astype(float).fillna(0.005).values

    ############# remove this when data base fixed
    TN = numpy.zeros(len(TN_values))

    for i in range(0, len(TN_values)):
        if ((sitename == 'Marion') and (TN_values[i] > 1.3)):
            TN[i] = 350.0
        else:
            TN[i] = 1000.0 * TN_values[i]

    raw_data2 = {'Date': dates_TN, 'TN': TN}
    ############### remove to here

    ############# put this back in
    #    TN_values = 1000.0*pd.to_numeric(TN1, errors='coerce')
    #    ## make data frame with Date,TN
    #    raw_data2 = {'Date': dates_TN,'TN': TN_values}
    df2 = pd.DataFrame(raw_data2, columns=['Date', 'TN'])

    ##### drop Benmore Boat data
    if (sitename == 'Benmore_Haldon'):
        print df2.Date
        df2 = df2.drop([
            df2.index[60], df2.index[61], df2.index[62], df2.index[65],
            df2.index[67]
        ])  ## 21-1-19 and 12-4-19 not pushed through yet
        print df2

############## chla ###
    measurement = 'Chlorophyll a (planktonic)'
    wq1 = get_data(base_url,
                   hts,
                   site,
                   measurement,
                   from_date=startdate,
                   to_date=enddate,
                   dtl_method='half').reset_index()
    dates_chla = wq1['DateTime']
    chla1 = wq1['Value']
    chla_1 = pd.to_numeric(chla1, errors='coerce')
    chla_values2 = chla_1.astype(float).fillna(0.1).values

    chla = numpy.zeros(len(chla_values2))
    for i in range(0, len(chla_values2)):
        ############# remove this when data base fixed
        if ((sitename == 'Marion') and (chla_values2[i] > 50.0)):
            chla[i] = 2.7
############### remove to her and change elif to if
#        elif (chla_values2[i] < 0.19):
#            chla[i] = 1000.0*chla_values2[i]
#            #remove next two lines when 2011 marhc april fixed
        elif (chla_values2[i] > 150):
            chla[i] = chla_values2[i] / 1000.0
        else:
            chla[i] = chla_values2[i]

    raw_data3 = {'Date': dates_chla, 'chla': chla}
    df3 = pd.DataFrame(raw_data3, columns=['Date', 'chla'])

    ##### drop Benmore Boat data
    if (sitename == 'Benmore_Haldon'):
        print df3.Date
        df3 = df3.drop([
            df3.index[62], df3.index[63], df3.index[64], df3.index[67],
            df3.index[69]
        ])  ## 21-1-19 and 12-4-19 not pushed through yet
        print df3

    ### Turbidity ###

    measurement = 'Turbidity'
    wq1 = get_data(base_url,
                   hts,
                   site,
                   measurement,
                   from_date=startdate,
                   to_date=enddate).reset_index()
    dates_Turbidity = wq1['DateTime']
    Turbidity_values1 = wq1['Value']

    Turb_1 = pd.to_numeric(Turbidity_values1, errors='coerce')
    Turbidity_values = Turb_1.astype(float).fillna(0.1).values

    raw_data4 = {'Date': dates_Turbidity, 'Turbidity': Turbidity_values}
    df4 = pd.DataFrame(raw_data4, columns=['Date', 'Turbidity'])

    ##### drop Benmore Boat data
    if (sitename == 'Benmore_Haldon'):
        print df4.Date
        df4 = df4.drop([
            df4.index[59], df4.index[60], df4.index[61], df4.index[64],
            df4.index[66]
        ])  ## 21-1-19 and 12-4-19 not pushed through yet
        print df4

    df.set_index(['Date'])
    df2.set_index(['Date'])
    df3.set_index(['Date'])
    df4.set_index(['Date'])

    #########Limits
    if ((sitename == 'Sumner') or (sitename == 'Coleridge')):
        TLI_limit = 2.0
        y_lim = 3.5
        TP_limit = 4.0
        TN_limit = 73.0
        chla_limit = 0.82
    elif ((sitename == 'Emma') or (sitename == 'Emily')
          or (sitename == 'Georgina') or (sitename == 'MaoriFront')
          or (sitename == 'MaoriBack')):
        TLI_limit = 4.0
        y_lim = 6.0
        TP_limit = 20.0
        TN_limit = 340.0
        chla_limit = 5.0
    elif (sitename == 'Denny'):
        TLI_limit = 3.0
        y_lim = 7.0
        TP_limit = 9.0
        TN_limit = 160.0
        chla_limit = 2.0
# PC5
    elif ((sitename == 'Ohau') or (sitename == 'Pukaki')
          or (sitename == 'Tekapo')):
        TLI_limit = 1.7
        y_lim = 3.5
        TP_limit = 10.0
        TN_limit = 160.0
        chla_limit = 2.0
    elif (sitename == 'Benmore_Haldon'):
        TLI_limit = 2.7
        y_lim = 5.0
        TP_limit = 10.0
        TN_limit = 160.0
        chla_limit = 2.0
    elif (sitename == 'Benmore_Dam'):
        TLI_limit = 2.7
        y_lim = 5.0
        TP_limit = 10.0
        TN_limit = 160.0
        chla_limit = 2.0
    elif (sitename == 'Benmore_Ahuriri'):
        TLI_limit = 2.9
        y_lim = 5.0
        TP_limit = 10.0
        TN_limit = 160.0
        chla_limit = 5.0
    elif (sitename == 'Aviemore'):
        TLI_limit = 2.0
        y_lim = 5.0
        TP_limit = 10.0
        TN_limit = 160.0
        chla_limit = 2.0
    elif (sitename == 'McGregor'):
        TLI_limit = 3.2
        y_lim = 5.0
        TP_limit = 20.0
        TN_limit = 350.0
        chla_limit = 2.0
    elif (sitename == 'Middleton'):
        TLI_limit = 3.6
        y_lim = 5.0
        TP_limit = 10.0
        TN_limit = 160.0
        chla_limit = 2.0
    elif (sitename == 'Alexandrina'):
        TLI_limit = 3.1
        y_lim = 5.0
        TP_limit = 10.0
        TN_limit = 350.0
        chla_limit = 2.0
    elif (sitename == 'Opuha'):
        TLI_limit = 4.0
        y_lim = 5.0
        TP_limit = 20.0
        TN_limit = 340.0
        chla_limit = 5.0
    elif ((sitename == 'Kellands_shore') or (sitename == 'Kellands_mid')):
        TLI_limit = 3.2
        y_lim = 6.0
        TP_limit = 10.0
        TN_limit = 500.0
        chla_limit = 2.0
    else:
        TLI_limit = 3.0
        y_lim = 5.0
        TP_limit = 9.0
        TN_limit = 160.0
        chla_limit = 2.0

    sq = site
    print sq
    filename = sq + '_scatterplot'
    fig, ax = plt.subplots(2, 2, figsize=(8.5, 5))
    ax[0, 0].plot(df.Date, df.TP, marker='.', linestyle='None')
    ax[0, 0].legend(frameon=True, facecolor='white', framealpha=1.0)
    ax[0, 0].set_ylabel('Total Phosphorus in microg/L')
    #    ax[0,0].axhline(y = TP_limit, linewidth=2, color='#d62728', label = 'CLWRP limit')
    ax[0, 0].set_title(sitename1)
    ax[0, 1].plot(df2.Date, df2.TN, marker='.', linestyle='None')
    ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
    ax[0, 1].set_ylabel('Total Nitrogen in microg/L')
    #    ax[0,1].axhline(y = TN_limit, linewidth=2, color='#d62728', label = 'CLWRP limit')
    ax[1, 0].plot(df3.Date, df3.chla, marker='.', linestyle='None')
    ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0)
    ax[1, 0].set_ylabel('Chlorophyll a in microg/L')
    #    ax[1,0].axhline(y = chla_limit, linewidth=2, color='#d62728', label = 'CLWRP limit')
    ax[1, 1].plot(df4.Date, df4.Turbidity, marker='.', linestyle='None')
    ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
    ax[1, 1].set_ylabel('Turbidity in NTU')
    plt.tight_layout()
    plt.show()
    plt.savefig(str(datapath_out) + filename + '.png')
    plt.close()

    # TLI caluclations (HCL)
    ### for Lake  For Lake Benmore non-detects are treated as dl, not half dl
    if ((sitename == 'Benmore_Dam') or (sitename == 'Benmore_Ahuriri')
            or (sitename == 'Benmore_Haldon')):
        measurement = 'Total Phosphorus'
        wq1 = get_data(base_url,
                       hts,
                       site,
                       measurement,
                       from_date='2004-01-12',
                       to_date=enddate).reset_index()
        dates_TP = wq1['DateTime']

        TP1 = wq1['Value']
        TP_1 = pd.to_numeric(TP1, errors='coerce')
        TP_values = TP_1.astype(float).fillna(0.004).values
        TP = numpy.zeros(len(TP_values))
        for i in range(0, len(TP_values)):
            if ((sitename == 'Benmore_Haldon')) and (TP_values[i] > 0.055):
                TP[i] = 4.0
            elif (TP_values[i] == 0.002):
                TP[i] = 4.0
            else:
                TP[i] = 1000.0 * TP_values[i]
        raw_data = {'Date': dates_TP, 'TP': TP}
        df = pd.DataFrame(raw_data, columns=['Date', 'TP'])
        df.set_index(['Date'])

# TLI caluclations (HCL)
    new_df = df2.set_index('Date').copy()
    new_df.index = pd.to_datetime(new_df.index)
    TLN_data = new_df.resample('A-JUN').mean()  # annual mean for hydro year
    TLN2 = TLN_data.resample('A-JUN').apply(
        lambda x: -3.61 + 3.01 * numpy.log10(x))
    ## annual means
    TN_mean1 = new_df.resample('A-JUN').mean()  # annual mean for hydro year
    TN_mean = TN_mean1.TN
    Years_TN = pd.DatetimeIndex(TN_mean.index).year

    raw_data5 = {'Year': Years_TN, 'TNmean': TN_mean}
    df_Mean_TN = pd.DataFrame(raw_data5, columns=['Year', 'TNmean'])

    new_df = df.set_index('Date').copy()
    new_df.index = pd.to_datetime(new_df.index)
    TLP_data = new_df.resample('A-JUN').mean()
    TLP2 = TLP_data.resample('A-JUN').apply(
        lambda x: 0.218 + 2.92 * numpy.log10(x))
    ## annual means
    TP_mean1 = new_df.resample('A-JUN').mean()
    TP_mean = TP_mean1.TP
    Years_TP = pd.DatetimeIndex(TP_mean.index).year

    raw_data6 = {'Year': Years_TP, 'TPmean': TP_mean}
    df_Mean_TP = pd.DataFrame(raw_data6, columns=['Year', 'TPmean'])

    new_df = df3.set_index('Date').copy()
    new_df.index = pd.to_datetime(new_df.index)
    TLC_data = new_df.resample('A-JUN').mean()
    TLC2 = TLC_data.resample('A-JUN').apply(
        lambda x: 2.22 + 2.54 * numpy.log10(x))
    ## annual means
    chla_mean1 = new_df.resample('A-JUN').mean()
    chla_mean = chla_mean1.chla
    Years_chla = pd.DatetimeIndex(chla_mean.index).year

    raw_data7 = {'Year': Years_chla, 'chla_mean': chla_mean}
    df_Mean_chla = pd.DataFrame(raw_data7, columns=['Year', 'chla_mean'])

    # Calculate TLI and make array with years
    TLI_data = (TLN2.TN + TLP2.TP + TLC2.chla) / 3.0
    Years = pd.DatetimeIndex(TLI_data.index).year

    # Output to csv
    TLI_data.to_csv(str(datapath_out) + 'TLI' + sitename + '.csv')
    #                    , header= ['Year', 'TLI_score'])

    # Plot TLI barchart with "limit' line
    n_groups = len(TLI_data)
    fig, ax = plt.subplots()
    index = numpy.arange(n_groups)
    bar_width = 0.35

    # Graph
    filename = sq + '_TLI'
    rects1 = ax.bar(
        index,
        TLI_data,
        bar_width,
        color='b',
        #                    yerr=std_men, error_kw=error_config,
        label='TLI')
    ax.set_ylim(0, y_lim)
    ax.set_xlabel('Year', fontsize=14)
    ax.set_ylabel('TLI Score', fontsize=14)
    ax.set_title(sitename1, fontsize=16)
    ax.set_xticks(index)  # + bar_width/2)
    ax.set_xticklabels((Years))
    plt.axhline(y=TLI_limit, linewidth=4, color='#d62728', label='CLWRP limit')
    ax.legend()
    fig.tight_layout()
    plt.show()
    plt.savefig(str(datapath_out) + filename + '.png')

    ### means, plan compliance
    #    print TN_mean
    #    print TP_mean
    #    print chla_mean
    Years_ab = Years
    #    Years_ab=["%02d" % b for b in range(Years_abr)]
    #    Years_ab = str(Years_abr).zfill(2)
    print Years_ab
    if (sitename != 'Kellands_shore'):

        filename = sq + '_scatterplot_means'
        fig, ax = plt.subplots(2, 2, figsize=(8.5, 5))

        ax[0, 0].bar(index, TLI_data, bar_width, color='b', label='TLI')
        ax[0, 0].axhline(y=TLI_limit,
                         linewidth=2,
                         color='#d62728',
                         label='CLWRP limit')
        #        ax[0,0].legend(frameon=True, facecolor = 'white', framealpha = 1.0)
        ax[0, 0].set_ylabel('TLI Score')
        ax[0, 0].set_xticks(index)  # + bar_width/2)
        ax[0, 0].set_xticklabels((Years_ab), rotation='vertical')
        ax[0, 0].set_title(sitename1)

        ax[0, 1].bar(index,
                     TN_mean,
                     bar_width,
                     color='b',
                     label='TN annual mean')
        ax[0, 1].axhline(y=TN_limit, linewidth=2,
                         color='#d62728')  #, label = 'CLWRP limit')
        ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[0, 1].set_ylabel('Total Nitrogen in microg/L')
        ax[0, 1].set_xticks(index)  # + bar_width/2)
        ax[0, 1].set_xticklabels((Years_ab), rotation='vertical')

        ax[1, 0].bar(index,
                     chla_mean,
                     bar_width,
                     color='b',
                     label='Chla annual mean')
        ax[1, 0].axhline(y=chla_limit, linewidth=2,
                         color='#d62728')  #, label = 'CLWRP limit')
        ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[1, 0].set_ylabel('Chlorophyll a in microg/L')
        ax[1, 0].set_xlabel('Year')
        ax[1, 0].set_xticks(index)  # + bar_width/2)
        ax[1, 0].set_xticklabels((Years_ab), rotation='vertical')

        ax[1, 1].bar(index,
                     TP_mean,
                     bar_width,
                     color='b',
                     label='TP annual mean')
        ax[1, 1].axhline(y=TP_limit, linewidth=2,
                         color='#d62728')  #, label = 'CLWRP limit')
        ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[1, 1].set_ylabel('Total Phosphorus in microg/L')
        ax[1, 1].set_xlabel('Year')
        ax[1, 1].set_xticks(index)  # + bar_width/2)
        ax[1, 1].set_xticklabels((Years_ab), rotation='vertical')

        plt.tight_layout()
        plt.show()
        plt.savefig(str(datapath_out) + filename + '.png')
        plt.close()

#######
# redo scatterplots for lakes with little data
    if ((sitename == 'Catherine') or (sitename == 'Denny')
            or (sitename == 'Evelyn') or (sitename == 'Henrietta')
            or (sitename == 'McGregor') or (sitename == 'Kellands_mid')
            or (sitename == 'Opuha') or (sitename == 'Emily')
            or (sitename == 'MaoriBack') or (sitename == 'MaoriFront')):
        filename = sq + '_scatterplot'
        fig, ax = plt.subplots(2, 2, figsize=(8.5, 5))
        ax[0, 0].plot(df.Date, df.TP, marker='.', linestyle='None')
        #        ax[0,0].axhline(y = TP_limit, linewidth=4, color='#d62728', label = 'CLWRP limit')
        ax[0, 0].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[0, 0].set_ylabel('Total Phosphorus in microg/L')
        ax[0, 0].set_title(sitename1)
        ax[0, 1].plot(df2.Date, df2.TN, marker='.', linestyle='None')
        #        ax[0,1].axhline(y = TN_limit, linewidth=2, color='#d62728', label = 'CLWRP limit')
        ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[0, 1].set_ylabel('Total Nitrogen in microg/L')
        ax[1, 0].plot(df3.Date, df3.chla, marker='.', linestyle='None')
        #        ax[1,0].axhline(y = chla_limit, linewidth=2, color='#d62728', label = 'CLWRP limit')
        ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[1, 0].set_ylabel('Chlorophyll a in microg/L')
        ax[1, 1].plot(df4.Date, df4.Turbidity, marker='.', linestyle='None')
        ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[1, 1].set_ylabel('Turbidity in NTU')
        plt.tight_layout()
        #        fig.autofmt_xdate()
        fig.autofmt_xdate(bottom=0.2, rotation=45, ha='right')
        plt.show()
        plt.savefig(str(datapath_out) + filename + '.png')
        plt.close()

    # force chla on same date scale as other parameters
    if (sitename == 'Kellands_shore'):
        filename = sq + '_scatterplot'
        fig, ax = plt.subplots(2, 2, figsize=(8.5, 5))
        ax[0, 0].plot(df.Date, df.TP, marker='.', linestyle='None')
        ax[0, 0].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[0, 0].set_ylabel('Total Phosphorus in microg/L')
        #        ax[0,0].axhline(y = TP_limit, linewidth=4, color='#d62728', label = 'CLWRP limit')
        ax[0, 0].set_title(sitename1)
        ax[0, 1].plot(df2.Date, df2.TN, marker='.', linestyle='None')
        ax[0, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[0, 1].set_ylabel('Total Nitrogen in microg/L')
        ax[1, 0].plot(df3.Date, df3.chla, marker='.', linestyle='None')
        ax[1, 0].set_xlim('2004-01-12', '2017-07-01')
        ax[1, 0].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[1, 0].set_ylabel('Chlorophyll a in microg/L')
        ax[1, 1].plot(df4.Date, df4.Turbidity, marker='.', linestyle='None')
        ax[1, 1].legend(frameon=True, facecolor='white', framealpha=1.0)
        ax[1, 1].set_ylabel('Turbidity in NTU')
        plt.tight_layout()
        fig.autofmt_xdate()
        plt.show()
        plt.savefig(str(datapath_out) + filename + '.png')
        plt.close()


#    ########### Write all data to csv
#    df['SiteName'] = 'Lake'+sitename
#    df['SQ'] = site
#    df['Parameter'] = 'TP'
#
#    df.to_csv(str(datapath_out)+'TP_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)#, header= ['Date','TP','Sitename','SQ','Parameter'])
#    print 'written csv'
#
#    df2['SiteName'] = 'Lake'+sitename
#    df2['SQ'] = site
#    df2['Parameter'] = 'TN'
#    df2.to_csv(str(datapath_out)+'TN_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)#, header= ['Date','TP','Sitename','SQ','Parameter'])
#    print 'written csv'
#
#    df3['SiteName'] = 'Lake'+sitename
#    df3['SQ'] = site
#    df3['Parameter'] = 'chla'
##    print df3
#    df3.to_csv(str(datapath_out)+'chla_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)#, header= ['Date','TP','Sitename','SQ','Parameter'])
#    print 'written csv'
#
#    ### write means
#    df_Mean_chla['SiteName'] = 'Lake'+sitename
#    df_Mean_chla['SQ'] = site
#    df_Mean_chla['Parameter'] = 'chla_mean'
#    df_Mean_chla.to_csv(str(datapath_out)+'ChlaMeans_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)
#    print 'written csv'
#
#    df_Mean_TN['SiteName'] = 'Lake'+sitename
#    df_Mean_TN['SQ'] = site
#    df_Mean_TN['Parameter'] = 'TN_mean'
#    df_Mean_TN.to_csv(str(datapath_out)+'TNMeans_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)
#    print 'written csv'
#
#    df_Mean_TP['SiteName'] = 'Lake'+sitename
#    df_Mean_TP['SQ'] = site
#    df_Mean_TP['Parameter'] = 'TP_mean'
#    df_Mean_TP.to_csv(str(datapath_out)+'TPMeans_hcl_all_lakes.csv', mode='a', encoding='utf-8', header=False)
#    print 'written csv'

    a = TLI_data
    return a
Пример #21
0
to_date = '2017-01-01'

sites = site_list(base_url, hts)
mtype_df1 = measurement_list(base_url, hts, site)

mtypes_list = []
for s in sites[:10]:
    mtype_df1 = measurement_list(base_url, hts, s)
    mtypes_list.append(mtype_df1)
mtypes_all = pd.concat(mtypes_list)

url = build_url(base_url, hts, 'MeasurementList', s)


mtype_df2 = wq_sample_parameter_list(base_url, hts, site)
tsdata1 = get_data(base_url, hts, site, measurement, from_date=from_date, to_date=to_date)
tsdata2, extra2 = get_data(base_url, hts, site, measurement, parameters=True)
tsdata3 = get_data(base_url, hts, site, 'WQ Sample')

sample_param_list = []
for s in sites[:10]:
    site_sample_param = wq_sample_parameter_list(base_url, hts, s)
    sample_param_list.append(site_sample_param)

sample_param_df = pd.concat(sample_param_list)


### Usage
base_url = 'http://wateruse.ecan.govt.nz'
hts = 'WaterUse.hts'
site = 'L36/1764-M1'
Пример #22
0
            mtype_list.append(m1)
        mtypes = pd.concat(mtype_list).reset_index()

        mtypes1 = mtypes[mtypes.To >= from_date]
        mtypes2 = mtypes1[~mtypes1.Measurement.str.
                          contains('regularity', case=False)].sort_values(
                              'To').drop_duplicates('Site', keep='last')

        ## Pull out the usage data and process
        tsdata_list = []
        for i, row in mtypes2.iterrows():
            timer = 10
            while timer > 0:
                try:
                    t1 = ws.get_data(param['Input']['hilltop_base_url'],
                                     param['Input']['hilltop_hts'],
                                     row['Site'], row['Measurement'],
                                     str(from_date), str(row['To']))
                    break
                except Exception as err:
                    err1 = err
                    timer = timer - 1
                    if timer == 0:
                        raise ValueError(err1)
                    else:
                        print(err1)
                        sleep(3)

            tsdata_list.append(t1)
        tsdata1 = pd.concat(tsdata_list)

        tsdata2 = util.proc_ht_use_data_ws(tsdata1)
def get_volume_data(base_url, hts, site, measurement, from_date, to_date):
    """Extracts compliance volume data from Hilltop for a given date range, and sums the reading counts for each day"""
    tsdata = ws.get_data(base_url, hts, site, measurement, from_date, to_date)
    dfdata = tsdata.reset_index().drop(columns='Site').drop(
        columns='Measurement')
    return dfdata