示例#1
0
def _nwis_parameters(site, service):
    return {site: list(nwis.get_site_data(site, service=service).keys())}
示例#2
0
def get_flows(start_date=None, end_date=None, gauge=None):
    today = pd.to_datetime('now')
    print(today)
    if all(a is None for a in [start_date,end_date]):
        # if no date/range was specified get the previous 10 days
        end_date = today
        start_date = today - pd.Timedelta(days = -10)
    else:
            if start_date is not None:
                start_date = pd.to_datetime(start_date)
                if start_date > today:
                    # oops!
                    raise ValueError("Invalid start_date in the future!")

                if end_date is None:
                    offset = 10
                else:
                    # two cases for end_date here, it is either:
                    #  offset (int) - a number of days to add to start_date
                    #  date string  - the actual date to stop
                    if type(end_date) is int:
                        if end_date < 0:
                            # if end_date is negative flip the start and end dates
                            start_date = start_date - pd.Timedelta(days = end_date)
                        end_date = start_date + pd.Timedelta(days = end_date)
            else:
                    end_date = pd.to_datetime(end_date)
                    print(min(end_date,today))

                if end_date > today:
                    end_date = today


    print("start_date:")
    print(type(start_date))
    print(start_date)
    print("end_date:")
    print(type(end_date))
    print(end_date)
        #except

    return None

    stations = gcgauges()
    parameter = '00060'
    flows = None
    for s in stations['ID']:
        # if a gauge was specified and this isn't it, skip it
        if gauge is not None and s != gauge:
            continue

        # get the streamflow for the gauge
        d = nwis.get_site_data(s, service='iv',
                               parameter_code=parameter,
                               start_date=start_date,
                               end_date=end_date,
                               period=period)

        fd = pd.DataFrame(d['00060:00011']['values'], dtype=object)
        # drop the qualifiers column
        fd.drop('qualifiers', axis=1, inplace=True)

        # strip the timezone info from the timestamp string and convert
        # the result to a datetime
        fd['datetime'] = fd['datetime'].str.replace('-0.:00','')
        fd['datetime'] = pd.to_datetime(fd['datetime'])

        # force the value to an int and rename the column
        fd[['value']] = fd[['value']].astype(int)
        fd = fd.rename(columns={'value': s})

        # aggregate the gauge frame into the flows frame
        if flows is None:
            flows = fd
        else:
            flows = pd.concat([flows, fd], axis=1)

    return flows
示例#3
0
    def download(self, catalog_id, file_path, dataset, **kwargs):
        p = param.ParamOverrides(self, kwargs)

        parameter = p.parameter
        start = p.start
        end = p.end
        period = p.period

        if dataset is None:
            dataset = 'station-' + catalog_id

        if start and end:
            period = None

        pmap = self.parameter_map(invert=True)
        parameter_code, statistic_code = (pmap[parameter].split(':') +
                                          [None])[:2]

        data = nwis.get_site_data(catalog_id,
                                  parameter_code=parameter_code,
                                  statistic_code=statistic_code,
                                  start=start,
                                  end=end,
                                  period=period,
                                  service=self.service_name)

        # dict contains only one key since only one parameter/statistic was
        # downloaded, this would need to be changed if multiple
        # parameter/stat were downloaded together
        if not data:
            raise ValueError('No Data Available')

        data = list(data.values())[0]

        # convert to dataframe and cleanup bad data
        df = pd.DataFrame(data['values'])
        if df.empty:
            raise ValueError('No Data Available')
        df = df.set_index('datetime')
        df.value = df.value.astype(float)
        if statistic_code in ['00001', '00002', '00003']:
            df.index = pd.to_datetime(df.index).to_period('D')
        else:
            df.index = pd.to_datetime(df.index)  # this is in UTC

        df[df.values == -999999] = pd.np.nan
        df.rename(columns={'value': parameter}, inplace=True)

        file_path = os.path.join(file_path, BASE_PATH, self.service_name,
                                 dataset, '{0}.h5'.format(dataset))

        del data['values']

        metadata = {
            'name':
            dataset,
            'metadata':
            data,
            'file_path':
            file_path,
            'file_format':
            'timeseries-hdf5',
            'datatype':
            'timeseries',
            'parameter':
            parameter,
            'unit':
            data['variable']['units']['code'],
            'service_id':
            'svc://usgs-nwis:{}/{}'.format(self.service_name, catalog_id)
        }

        # save data to disk
        io = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5']
        io.write(file_path, df, metadata)
        del metadata['service_id']

        return metadata
示例#4
0
import time

# use ulmo library to download USGS streamflow data

t = pd.date_range(start='1980-10-01', end='2017-09-30')
df = pd.DataFrame(index=t)

sites = ['11427200', '11427500', '11427700', '11427750',
         '11427760', '11427960', '11428400', '11428600',
         '11428800', '11433060', '11433080']

# STREAMFLOW
for k in sites:
    print(k)
    data = ug.get_site_data(site_code=k, 
                            parameter_code='00060', 
                            service='dv', 
                            start='1980', end='2017')

    Q = pd.DataFrame(data['00060:00003']['values'])
    Q.index = pd.to_datetime(Q.datetime)
    df = pd.concat([df, Q.value.astype(float).rename('Q-'+k)], axis=1, copy=False)
    time.sleep(2)

sites = ['11427400','11428700']

# STORAGE
for k in sites:
    print(k)
    data = ug.get_site_data(site_code=k, 
                            parameter_code='00054', 
                            service='dv',