Exemplo n.º 1
0
Arquivo: core.py Projeto: emiliom/ulmo
def _open_data_file(url, data_dir):
    """returns an open file handle for a data file; downloading if necessary or
    otherwise using a previously downloaded file
    """
    file_name = url.rsplit('/', 1)[-1]
    file_path = os.path.join(data_dir, file_name)
    return util.open_file_for_url(url, file_path, check_modified=True, use_bytes=True)
Exemplo n.º 2
0
Arquivo: core.py Projeto: wilsaj/ulmo
def _open_data_file(url, data_dir):
    """returns an open file handle for a data file; downloading if necessary or
    otherwise using a previously downloaded file
    """
    file_name = url.rsplit('/', 1)[-1]
    file_path = os.path.join(data_dir, file_name)
    return util.open_file_for_url(url, file_path, check_modified=True)
Exemplo n.º 3
0
Arquivo: core.py Projeto: wilsaj/ulmo
def _get_element_data(element, by_state, use_file, location_names):
    url = _get_url(element, by_state)
    filename = url.rsplit('/', 1)[-1]
    path = os.path.join(CIRS_DIR, filename)

    with util.open_file_for_url(url, path, use_file=use_file) as f:
        element_df = _parse_values(f, by_state, location_names, element)

    return element_df
Exemplo n.º 4
0
def get_stations():
    path = os.path.join(USACE_RIVERGAGES_DIR, 'datamining_field_list.cfm')

    with util.open_file_for_url(URL, path, use_bytes=True) as f:
        soup = BeautifulSoup(f)
        options = soup.find('select', id='fld_station').find_all('option')
        stations = _parse_options(options)

    return stations
Exemplo n.º 5
0
def get_stations():
    path = os.path.join(USACE_RIVERGAGES_DIR, 'datamining_field_list.cfm')

    with util.open_file_for_url(URL, path) as f:
        soup = BeautifulSoup(f)
        options = soup.find('select', id='fld_station').find_all('option')
        stations = _parse_options(options)

    return stations
Exemplo n.º 6
0
def get_stations(fips=None, country=None, state=None, start=None, end=None, update=True):

    if start:
        start_date = util.convert_date(start)
    else:
        start_date = None
    if end:
        end_date = util.convert_date(end)
    else:
        end_date = None

    if isinstance(fips, basestring):
        fips = [fips]
    if isinstance(country, basestring):
        country = [country]
    if isinstance(state, basestring):
        state = [state]

    stations_url = 'http://www1.ncdc.noaa.gov/pub/data/gsod/ish-history.csv'
    with util.open_file_for_url(stations_url, NCDC_GSOD_STATIONS_FILE) as f:
        reader = csv.DictReader(f)

        if fips is None and country is None and state is None \
                and start is None and end is None:
            rows = reader
        else:
            if start_date is None:
                start_str = None
            else:
                start_str = start_date.strftime('%Y%m%d')
            if end_date is None:
                end_str = None
            else:
                end_str = end_date.strftime('%Y%m%d')
            rows = [
                row for row in reader
                if _passes_row_filter(row, fips=fips, country=country,
                    state=state, start_str=start_str, end_str=end_str)
            ]

        stations = dict([
            (_station_code(row), _process_station(row))
            for row in rows
        ])
    return stations
Exemplo n.º 7
0
def get_stations():
    """Fetches a list of station codes and descriptions.

    Returns
    -------
    stations_dict : dict
        a python dict with station codes mapped to station information
    """
    stations_url = 'http://www.swt-wc.usace.army.mil/shefids.htm'
    path = os.path.join(USACE_SWTWC_DIR, 'shefids.htm')

    with util.open_file_for_url(stations_url, path) as f:
        soup = BeautifulSoup(f)
        pre = soup.find('pre')
        links = pre.find_all('a')
        stations = [_parse_station_link(link) for link in links]

    return dict([(station['code'], station) for station in stations])
Exemplo n.º 8
0
Arquivo: core.py Projeto: wilsaj/ulmo
def get_stations():
    """Fetches a list of station codes and descriptions.

    Returns
    -------
    stations_dict : dict
        a python dict with station codes mapped to station information
    """
    stations_url = 'http://www.swt-wc.usace.army.mil/shefids.htm'
    path = os.path.join(USACE_SWTWC_DIR, 'shefids.htm')

    with util.open_file_for_url(stations_url, path) as f:
        soup = BeautifulSoup(f)
        pre = soup.find('pre')
        links = pre.find_all('a')
        stations = [_parse_station_link(link) for link in links]

    return dict([(station['code'], station) for station in stations])
Exemplo n.º 9
0
def get_station_data(station_code, date=None, as_dataframe=False):
    """Fetches data for a station at a given date.


    Parameters
    ----------
    station_code: str
        The station code to fetch data for. A list of stations can be retrieved with
        ``get_stations()``
    date : ``None`` or date (see :ref:`dates-and-times`)
        The date of the data to be queried. If date is ``None`` (default), then
        data for the current day is retreived.
    as_dataframe : bool
        This determines what format values are returned as. If ``False``
        (default), the values dict will be a dict with timestamps as keys mapped
        to a dict of gauge variables and values. If ``True`` then the values
        dict will be a pandas.DataFrame object containing the equivalent
        information.


    Returns
    -------
    data_dict : dict
        A dict containing station information and values.
    """

    station_dict = {}
    if date is None:
        date_str = 'current'
        year = datetime.date.today().year
    else:
        date = util.convert_date(date)
        date_str = date.strftime('%Y%m%d')
        year = date.year

    filename = '%s.%s.html' % (station_code, date_str)
    data_url = 'http://www.swt-wc.usace.army.mil/webdata/gagedata/' + filename
    path = os.path.join(USACE_SWTWC_DIR, filename)

    with util.open_file_for_url(data_url, path) as f:
        soup = BeautifulSoup(f)
        pre = soup.find('pre')
        if pre is None:
            error_msg = 'no data could be found for station code %(station_code)s and date %(date)s (url: %(data_url)s)' % {
                'date': date,
                'data_url': data_url,
                'station_code': station_code,
            }
            raise ValueError(error_msg)
        sio = StringIO.StringIO(str(pre.text.strip()))

    first_line = sio.readline()
    split = first_line[8:].strip().split()

    station_dict['code'] = split[0]
    station_dict['description'] = ' '.join(split[1:])

    second_line = sio.readline()
    station_dict['station_type'] = second_line.strip().split(':')[1].strip()

    notes = []

    while 1:
        next_line = sio.readline()
        if ':' in next_line:
            notes.append(next_line.strip())
        else:
            break

    if len(notes):
        station_dict['notes'] = '\n'.join(notes)

    variable_names = _split_line(sio.readline()[15:], 10)
    variable_units = _split_line(sio.readline()[15:], 10)
    variable_sources = _split_line(sio.readline()[15:], 10)

    station_dict['variables'] = dict([(name, {
        'unit': unit,
        'source': source
    }) for name, unit, source in zip(variable_names, variable_units,
                                     variable_sources)])

    station_dict['timezone'] = sio.readline().strip().strip('()')
    column_names = ['datetime'] + variable_names
    widths = [15] + ([10] * len(variable_names))
    converters = dict([(variable_name,
                        lambda x: float(x) if x != '----' else np.nan)
                       for variable_name in variable_names])
    date_parser = lambda x: _convert_datetime(x, year)
    dataframe = pandas.read_fwf(
        sio,
        names=column_names,
        widths=widths,
        index_col=['datetime'],
        na_values=['----'],
        converters=converters,
        parse_dates=True,
        date_parser=date_parser)

    # parse out rows that are all nans (e.g. end of "current" page)
    dataframe = dataframe[~np.isnan(dataframe.T.sum())]

    if as_dataframe:
        station_dict['values'] = dataframe
    else:
        station_dict['values'] = util.dict_from_dataframe(dataframe)

    return station_dict
Exemplo n.º 10
0
def get_stations(country=None, state=None, start=None, end=None, update=True):
    """Retrieve information on the set of available stations.


    Parameters
    ----------
    country : {``None``, str, or iterable}
        If specified, results will be limited to stations with matching country
        codes.
    state : {``None``, str, or iterable}
        If specified, results will be limited to stations with matching state
        codes.
    start : ``None`` or date (see :ref:`dates-and-times`)
        If specified, results will be limited to stations which have data after
        this start date.
    end : ``None`` or date (see :ref:`dates-and-times`)
        If specified, results will be limited to stations which have data before
        this end date.
    update : bool
        If ``True`` (default), check for a newer copy of the stations file and
        download if it is newer the previously downloaded copy. If ``False``,
        then a new stations file will only be downloaded if a previously
        downloaded file cannot be found.


    Returns
    -------
    stations_dict : dict
        A dict with USAF-WBAN codes keyed to station information dicts.
    """
    if start:
        start_date = util.convert_date(start)
    else:
        start_date = None
    if end:
        end_date = util.convert_date(end)
    else:
        end_date = None

    if isinstance(country, basestring):
        country = [country]
    if isinstance(state, basestring):
        state = [state]

    stations_url = 'http://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv'
    with util.open_file_for_url(stations_url, NCDC_GSOD_STATIONS_FILE) as f:
        reader = csv.DictReader(f)

        if country is None and state is None and start is None and end is None:
            rows = reader
        else:
            if start_date is None:
                start_str = None
            else:
                start_str = start_date.strftime('%Y%m%d')
            if end_date is None:
                end_str = None
            else:
                end_str = end_date.strftime('%Y%m%d')
            rows = [
                row for row in reader
                if _passes_row_filter(row, country=country, state=state,
                    start_str=start_str, end_str=end_str)
            ]

        stations = dict([
            (_station_code(row), _process_station(row))
            for row in rows
        ])
    return stations
Exemplo n.º 11
0
Arquivo: core.py Projeto: wilsaj/ulmo
def get_station_data(station_code, date=None, as_dataframe=False):
    """Fetches data for a station at a given date.


    Parameters
    ----------
    station_code: str
        The station code to fetch data for. A list of stations can be retrieved with
        ``get_stations()``
    date : ``None`` or date (see :ref:`dates-and-times`)
        The date of the data to be queried. If date is ``None`` (default), then
        data for the current day is retreived.
    as_dataframe : bool
        This determines what format values are returned as. If ``False``
        (default), the values dict will be a dict with timestamps as keys mapped
        to a dict of gauge variables and values. If ``True`` then the values
        dict will be a pandas.DataFrame object containing the equivalent
        information.


    Returns
    -------
    data_dict : dict
        A dict containing station information and values.
    """

    station_dict = {}
    if date is None:
        date_str = 'current'
        year = datetime.date.today().year
    else:
        date = util.convert_date(date)
        date_str = date.strftime('%Y%m%d')
        year = date.year

    filename = '%s.%s.html' % (station_code, date_str)
    data_url = 'http://www.swt-wc.usace.army.mil/webdata/gagedata/' + filename
    path = os.path.join(USACE_SWTWC_DIR, filename)

    with util.open_file_for_url(data_url, path) as f:
        soup = BeautifulSoup(f)
        pre = soup.find('pre')
        if pre is None:
            error_msg = 'no data could be found for station code %(station_code)s and date %(date)s (url: %(data_url)s)' % {
                'date': date,
                'data_url': data_url,
                'station_code': station_code,
            }
            raise ValueError(error_msg)
        sio = StringIO.StringIO(str(pre.text.strip()))

    first_line = sio.readline()
    split = first_line[8:].strip().split()

    station_dict['code'] = split[0]
    station_dict['description'] = ' '.join(split[1:])

    second_line = sio.readline()
    station_dict['station_type'] = second_line.strip().split(':')[1].strip()

    notes = []

    while 1:
        next_line = sio.readline()
        if ':' in next_line:
            notes.append(next_line.strip())
        else:
            break

    if len(notes):
        station_dict['notes'] = '\n'.join(notes)

    variable_names = _split_line(sio.readline()[15:], 10)
    variable_units = _split_line(sio.readline()[15:], 10)
    variable_sources = _split_line(sio.readline()[15:], 10)

    station_dict['variables'] = dict([(name, {
        'unit': unit,
        'source': source
    }) for name, unit, source in zip(variable_names, variable_units,
                                     variable_sources)])

    station_dict['timezone'] = sio.readline().strip().strip('()')
    column_names = ['datetime'] + variable_names
    widths = [15] + ([10] * len(variable_names))
    converters = dict([(variable_name, lambda x: float(x)
                        if x != '----' else np.nan)
                       for variable_name in variable_names])
    date_parser = lambda x: _convert_datetime(x, year)
    dataframe = pandas.read_fwf(sio,
                                names=column_names,
                                widths=widths,
                                index_col=['datetime'],
                                na_values=['----'],
                                converters=converters,
                                parse_dates=True,
                                date_parser=date_parser)

    # parse out rows that are all nans (e.g. end of "current" page)
    dataframe = dataframe[~np.isnan(dataframe.T.sum())]

    if as_dataframe:
        station_dict['values'] = dataframe
    else:
        station_dict['values'] = util.dict_from_dataframe(dataframe)

    return station_dict