예제 #1
0
파일: utils.py 프로젝트: timcera/mettoolbox
def _check_temperature_cols(
    temp_min_col=None,
    temp_max_col=None,
    temp_mean_col=None,
    temp_min_required=False,
    temp_max_required=False,
):
    """Check temperature columns to make sure necessary ones are filled in."""
    if temp_min_col is None and temp_min_required is True:
        raise ValueError(
            tsutils.error_wrapper("""
            This evaporation method requires the minimum daily temperature column to be specified with "temp_min_col"."""
                                  .format(**locals)))
    if temp_max_col is None and temp_max_required is True:
        raise ValueError(
            tsutils.error_wrapper("""
            This evaporation method requires the maximum daily temperature column to be specified with "temp_max_col"."""
                                  .format(**locals)))
    if temp_min_col is None or temp_max_col is None:
        raise ValueError(
            tsutils.error_wrapper("""
            If you do not pass a mean daily temperature column in "temp_mean_col"
            you must give both minimum and maximum daily temperatures using
            "temp_min_col" and "temp_max_col".

    You gave {temp_min_col} for "temp_min_col" and
             {temp_max_col} for "temp_max_col". """.format(**locals)))
    return _check_cols((temp_min_col, "tmin"), (temp_max_col, "tmax"),
                       (temp_mean_col, "tmean"))
예제 #2
0
파일: utils.py 프로젝트: timcera/mettoolbox
def _validate_temperatures(tsd, temp_min_col, temp_max_col):
    if "tmean" not in tsd.columns:
        if (tsd.tmax < tsd.tmin).any():
            raise ValueError(
                tsutils.error_wrapper("""
                On the following dates:

        {0},

        minimum temperature values in column "{1}" are greater than or
        equal to the maximum temperature values in column "{2}".""".format(
                    tsd[tsd.tmax < tsd.tmin].index, temp_min_col,
                    temp_max_col)))

        warnings.warn(
            tsutils.error_wrapper(
                """ Since `temp_mean_col` is None, the average daily temperature will be
estimated by the average of `temp_min_col` and `temp_max_col`""".format(
                    **locals())))
        tsd["tmean"] = (tsd.tmin + tsd.tmax) / 2.0
    else:
        if (tsd.tmin >= tsd.tmean).any() or (tsd.tmax <= tsd.tmean).any():
            raise ValueError(
                tsutils.error_wrapper(""" On the following dates:

        {0},

        the daily average is either below or equal to the minimum temperature in column {1} or higher or equal to the maximum temperature in column
    {2}.""".format(
                    tsd[tsd.tmin >= tsd.tmean | tsd.tmax <= tsd.tmean],
                    temp_min_col,
                    temp_max_col,
                )))
    return tsd
예제 #3
0
    def write_dsn(self, wdmpath, dsn, data):
        """Write to self.wdmfp/dsn the time-series data."""
        dsn_desc = self.describe_dsn(wdmpath, dsn)
        tcode = dsn_desc["tcode"]
        tstep = dsn_desc["tstep"]
        tsfill = dsn_desc["tsfill"]

        data.fillna(tsfill, inplace=True)
        start_date = data.index[0]

        dstart_date = start_date.timetuple()[:6]
        llsdat = self._tcode_date(tcode, dstart_date)
        if dsn_desc["base_year"] > llsdat[0]:
            raise ValueError(
                tsutils.error_wrapper("""
The base year for this DSN is {0}.  All data to insert must be after the
base year.  Instead the first year of the series is {1}.
""".format(dsn_desc["base_year"], llsdat[0])))

        nval = len(data)
        lock = SoftFileLock(wdmpath + ".lock", timeout=30)
        with lock:
            wdmfp = self._open(wdmpath, 58)
            retcode = self.wdtput(wdmfp, dsn, tstep, llsdat, nval, 1, 0, tcode,
                                  data)
            self._close(wdmpath)
        self._retcode_check(retcode,
                            additional_info="wdtput file={0} DSN={1}".format(
                                wdmpath, dsn))
예제 #4
0
def cleancopywdm(inwdmpath, outwdmpath, overwrite=False):
    """Make a clean copy of a WDM file.

    Parameters
    ----------
    inwdmpath
        Path to input WDM
        file.
    outwdmpath
        Path to clean copy WDM
        file.
    overwrite
        Whether to overwrite an existing
        outwdmpath.

    """
    if inwdmpath == outwdmpath:
        raise ValueError(
            tsutils.error_wrapper("""
The "inwdmpath" cannot be the same as "outwdmpath".
"""))
    createnewwdm(outwdmpath, overwrite=overwrite)
    activedsn = []
    for i in range(1, 32000):
        try:
            activedsn.append(_describedsn(inwdmpath, i)["dsn"])
        except wdmutil.WDMError:
            continue
    # Copy labels (which copies DSN metadata and data)
    for i in activedsn:
        try:
            _copy_dsn(inwdmpath, i, outwdmpath, i)
        except wdmutil.WDMError:
            pass
예제 #5
0
def dump(hbnfilename, time_stamp="begin"):
    """
    Prints out ALL data from a HSPF binary output file.
    """
    if time_stamp not in ["begin", "end"]:
        raise ValueError(
            tsutils.error_wrapper("""
The "time_stamp" optional keyword must be either
"begin" or "end".  You gave {0}.
""".format(time_stamp)))

    index, data = _get_data(hbnfilename, None, [",,,"], catalog_only=False)
    skeys = sorted(data.keys())

    result = pd.DataFrame(
        pd.concat([pd.Series(data[i], index=index) for i in skeys],
                  sort=False,
                  axis=1).reindex(pd.Index(index)))

    columns = ["{0}_{1}_{2}_{3}".format(i[1], i[2], i[4], i[5]) for i in skeys]
    result.columns = columns

    if time_stamp == "begin":
        result = tsutils.asbestfreq(result)
        result = result.tshift(-1)

    return result
예제 #6
0
파일: pet.py 프로젝트: timcera/mettoolbox
def _columns(tsd, req_column_list=[], optional_column_list=[]):
    if None in req_column_list:
        raise ValueError(
            tsutils.error_wrapper(
                """
You need to supply the column (name or number, data column numbering
starts at 1) for {0} time-series.

Instead you gave {1}""".format(
                    len(req_column_list), req_column_list
                )
            )
        )

    collect = []
    for loopvar in req_column_list + optional_column_list:
        try:
            nloopvar = int(loopvar) - 1
        except TypeError:
            nloopvar = loopvar

        if nloopvar is None:
            collect.append(None)
        else:
            collect.append(tsd.ix[:, nloopvar])

    return collect
예제 #7
0
    def _open(self, wdname, wdmsfl, ronwfg=0):
        """Private method to open WDM file."""
        wdname = wdname.strip()
        if wdname not in self.openfiles:
            if ronwfg in [0, 1]:
                if not os.path.exists(wdname):
                    raise ValueError(
                        tsutils.error_wrapper("""
Trying to open file "{0}" and it cannot be found.
""".format(wdname)))
            retcode = self.wdbopn(wdmsfl, wdname, ronwfg)
            self._retcode_check(
                retcode,
                additional_info="wdbopn file={0} DSN={1}".format(wdname, "NA"))
            self.openfiles[wdname] = wdmsfl
        return wdmsfl
예제 #8
0
def extract(*wdmpath, **kwds):
    """Print out DSN data to the screen with ISO-8601 dates.

    This is the API version also used by 'extract_cli'
    """
    # Adapt to both forms of presenting wdm files and DSNs
    # Old form '... file.wdm 101 102 103 ...'
    # New form '... file.wdm,101 adifferentfile.wdm,101 ...
    try:
        start_date = kwds.pop("start_date")
    except KeyError:
        start_date = None
    try:
        end_date = kwds.pop("end_date")
    except KeyError:
        end_date = None
    if len(kwds) > 0:
        raise ValueError(
            tsutils.error_wrapper("""
The only allowed keywords are start_date and end_date.  You
have given {0}.
""".format(kwds)))

    labels = []
    for lab in wdmpath:
        if "," in str(lab):
            labels.append(lab.split(","))
        else:
            if lab == wdmpath[0]:
                continue
            labels.append([wdmpath[0], lab])

    result = pd.DataFrame()
    cnt = 0
    for ind, lab in enumerate(labels):
        wdmpath = lab[0]
        dsn = lab[1]
        nts = WDM.read_dsn(wdmpath,
                           int(dsn),
                           start_date=start_date,
                           end_date=end_date)
        if nts.columns[0] in result.columns:
            cnt = cnt + 1
            nts.columns = ["{0}_{1}".format(nts.columns[0], cnt)]
        result = result.join(nts, how="outer")
    return result
예제 #9
0
def listdsns(wdmpath):
    """Print out a table describing all DSNs in the WDM.
    """
    if not os.path.exists(wdmpath):
        raise ValueError(
            tsutils.error_wrapper("""
File {0} does not exist.
""".format(wdmpath)))

    collect = OrderedDict()
    for i in range(1, 32001):
        try:
            testv = _describedsn(wdmpath, i)
        except wdmutil.WDMError:
            continue
        collect[i] = testv
    return collect
예제 #10
0
def single_target_units(source_units, target_units, default=None, cnt=1):
    if default is None:
        return source_units

    if target_units is None:
        return [default] * len(source_units)

    tunits = set(target_units)
    if len(tunits) != cnt:
        raise ValueError(
            tsutils.error_wrapper("""
Since creating a single disaggregated time-series there can only be
a single "target_units".  You gave "{target_units}".
""".format(**locals())))
    if len(source_units) == len(target_units):
        return target_units

    return [target_units[0]] * len(source_units)
예제 #11
0
def extract_arr(filename, *labels):
    """DEPRECATED: Extract and return the raw numpy array.

    DEPRECATED: Will be removed in future version. Instead use the following.

    >>> from swmmtoolbox import swmmtoolbox
    >>> na = swmmtoolbox.extract("filename.out", "link,41a,Flow_rate")[0].to_array()

    The `extract_arr` function will return the numpy array for the last entry
    in "*labels".

    Parameters
    ----------
    {filename}
    {labels}

    """
    warnings.warn(
        tsutils.error_wrapper(
            """
DEPRECATED: Will be removed in future version. Instead use the following.

>>> from swmmtoolbox import swmmtoolbox

>>> na = swmmtoolbox.extract("filename.out", "link,41a,Flow_rate")[0].to_array()
"""
        )
    )
    obj = SwmmExtract(filename)
    for label in labels:
        itemtype, name, variableindex = tsutils.make_list(label, n=3)
        typenumber = obj.type_check(itemtype)
        if itemtype != "system":
            name = obj.name_check(itemtype, name)[0]

        data = np.zeros(len(list(range(obj.swmm_nperiods))))

        for time in range(obj.swmm_nperiods):
            _, value = obj.get_swmm_results(typenumber, name, int(variableindex), time)
            data[time] = value

    return data
예제 #12
0
def radiation(
    method: Literal["pot_rad", "pot_rad_via_ssd", "pot_rad_via_bc",
                    "mean_course"],
    source_units,
    input_ts="-",
    columns=None,
    start_date=None,
    end_date=None,
    dropna="no",
    clean=False,
    round_index=None,
    skiprows=None,
    index_type="datetime",
    names=None,
    target_units=None,
    print_input=False,
    pot_rad=None,
    angstr_a=0.25,
    angstr_b=0.5,
    bristcamp_a=0.75,
    bristcamp_c=2.4,
    hourly_rad=None,
    lat=None,
    lon=None,
    glob_swr_col=None,
):
    """Disaggregate daily to hourly data.

    disaggregate_radiation(data_daily,
                           sun_times=None,
                           pot_rad=None,
                           method='pot_rad',
                           angstr_a=0.25,
                           angstr_b=0.5,
                           bristcamp_a=0.75,
                           bristcamp_c=2.4,
                           mean_course=None):
    Args:
        daily_data: daily values
        sun_times: daily dataframe including results of the util.sun_times function
        pot_rad: hourly dataframe including potential radiation
        method: keyword specifying the disaggregation method to be used
        angstr_a: parameter a of the Angstrom model (intercept)
        angstr_b: parameter b of the Angstrom model (slope)
        bristcamp_a: ?
        bristcamp_c: ?
        mean_course: monthly values of the mean hourly radiation course

    if method == 'mean_course':
        HOURLY radiation in "mean_course"

    if method in ('pot_rad', 'mean_course')
        .glob from glob_swr_col

    if method == 'pot_rad_via_ssd'
        daily sun_times

    if method == 'pot_rad_via_bc'
        bristcamp_a
        bristcamp_c
    """
    target_units = single_target_units(source_units, target_units, "W/m2")

    target_units = target_units[0] * len(source_units)

    pd.options.display.width = 60

    if method == "mean_course" and hourly_rad is None:
        raise ValueError(
            tsutils.error_wrapper("""
If method is "mean_course" need to supply CSV filename of hourly
radiation by the `hourly_rad` keyword."""))

    if method in ["pot_rad", "mean_course"] and glob_swr_col is None:
        raise ValueError(
            tsutils.error_wrapper("""
If method is "pot_rad" or "mean_course" need to supply the daily global
short wave radiation as column name or index with keyword
`glob_swr_col`"""))

    if method == "pot_rad_via_bc" and (bristcamp_a is None
                                       or bristcamp_c is None):
        raise ValueError(
            tsutils.error_wrapper("""
If method is "pot_rad_via_bc" need to supply the keywords `bristcamp_a`
and `bristcamp_c`."""))

    tsd = tsutils.common_kwds(
        tsutils.read_iso_ts(input_ts,
                            skiprows=skiprows,
                            names=names,
                            index_type=index_type),
        start_date=start_date,
        end_date=end_date,
        pick=columns,
        round_index=round_index,
        dropna=dropna,
        source_units=source_units,
        target_units=target_units,
        clean=clean,
    )

    if method in ["pot_rad", "mean_course"]:
        try:
            glob_swr_col = glob_swr_col - 1
        except ValueError:
            pass
        tsd["glob"] = tsd[glob_swr_col]

    sun_times = None
    if method == "pot_rad_via_ssd":
        sun_times = get_sun_times(tsd.index, float(lon), float(lat),
                                  round(lon / 15.0))

    return tsutils.return_input(
        print_input,
        tsd,
        pd.DataFrame(
            disaggregate_radiation(
                tsd,
                sun_times=sun_times,
                pot_rad=pot_rad,
                method=method,
                angstr_a=angstr_a,
                angstr_b=angstr_b,
                bristcamp_a=bristcamp_a,
                bristcamp_c=bristcamp_c,
                mean_course=hourly_rad,
            )),
    )
예제 #13
0
    def create_new_dsn(
        self,
        wdmpath,
        dsn,
        tstype="",
        base_year=1900,
        tcode=4,
        tsstep=1,
        statid=" ",
        scenario="",
        location="",
        description="",
        constituent="",
        tsfill=-999.0,
    ):
        """Create self.wdmfp/dsn."""
        lock = SoftFileLock(wdmpath + ".lock", timeout=30)
        with lock:
            wdmfp = self._open(wdmpath, 57)
            messfp = self.wmsgop()

            if self.wdckdt(wdmfp, dsn) == 1:
                self._close(wdmpath)
                raise DSNExistsError(dsn)

            # Parameters for wdlbax taken from ATCTSfile/clsTSerWDM.cls
            self.wdlbax(
                wdmfp,
                dsn,
                1,  # DSTYPE - always 1 for time series
                10,  # NDN    - number of down pointers
                10,  # NUP    - number of up pointers
                30,  # NSA    - number of search attributes
                100,  # NSASP  - amount of search attribute space
                300,  # NDP    - number of data pointers
            )  # PSA    - pointer to search attribute space

            for saind, salen, saval in [
                (34, 1, 6),  # tgroup
                (83, 1, 1),  # compfg
                (84, 1, 1),  # tsform
                (85, 1, 1),  # vbtime
                (17, 1, int(tcode)),  # tcode
                (33, 1, int(tsstep)),  # tsstep
                (27, 1, int(base_year)),  # tsbyr
            ]:
                retcode = self.wdbsai(wdmfp, dsn, messfp, saind, salen, saval)
                self._retcode_check(
                    retcode,
                    additional_info="wdbsai file={0} DSN={1}".format(
                        wdmpath, dsn),
                )

            for saind, salen, saval in [(32, 1, tsfill)]:  # tsfill
                retcode = self.wdbsar(wdmfp, dsn, messfp, saind, salen, saval)
                self._retcode_check(
                    retcode,
                    additional_info="wdbsar file={0} DSN={1}".format(
                        wdmpath, dsn),
                )

            for saind, salen, saval, error_name in [
                (2, 16, statid, "Station ID"),
                (1, 4, tstype.upper(), "Time series type - tstype"),
                (45, 48, description.upper(), "Description"),
                (288, 8, scenario.upper(), "Scenario"),
                (289, 8, constituent.upper(), "Constituent"),
                (290, 8, location.upper(), "Location"),
            ]:
                saval = saval.strip()
                if len(saval) > salen:
                    raise ValueError(
                        tsutils.error_wrapper("""
String "{0}" is too long for {1}.  Must
have a length equal or less than {2}.
""".format(saval, error_name, salen)))

                saval = "{0: <{1}}".format(saval, salen)

                retcode = self.wdbsac(wdmfp, dsn, messfp, saind, salen, saval)
                self._retcode_check(
                    retcode,
                    additional_info="wdbsac file={0} DSN={1}".format(
                        wdmpath, dsn),
                )
            self._close(wdmpath)
예제 #14
0
def _get_data(binfilename,
              interval="daily",
              labels=[",,,"],
              catalog_only=True):
    """Underlying function to read from the binary file.  Used by
    'extract', 'catalog', and 'dump'.
    """
    testem = {
        "PERLND": [
            "ATEMP",
            "SNOW",
            "PWATER",
            "SEDMNT",
            "PSTEMP",
            "PWTGAS",
            "PQUAL",
            "MSTLAY",
            "PEST",
            "NITR",
            "PHOS",
            "TRACER",
            "",
        ],
        "IMPLND": ["ATEMP", "SNOW", "IWATER", "SOLIDS", "IWTGAS", "IQUAL", ""],
        "RCHRES": [
            "HYDR",
            "CONS",
            "HTRCH",
            "SEDTRN",
            "GQUAL",
            "OXRX",
            "NUTRX",
            "PLANK",
            "PHCARB",
            "INFLOW",
            "OFLOW",
            "ROFLOW",
            "",
        ],
        "BMPRAC": [""],
        "": [""],
    }

    collect_dict = {}
    lablist = []

    # Normalize interval code
    try:
        intervalcode = interval2codemap[interval.lower()]
    except AttributeError:
        intervalcode = None

    # Fixup and test the labels - could be in it's own function
    for lindex, label in enumerate(labels):
        words = [lindex] + label.split(",")
        if len(words) != 5:
            raise ValueError(
                tsutils.error_wrapper("""
The label '{0}' has the wrong number of entries.
""".format(label)))

        words = [None if i == "" else i for i in words]

        if words[1] is not None:
            words[1] = words[1].upper()
            if words[1] not in testem.keys():
                raise ValueError(
                    tsutils.error_wrapper("""
Operation type must be one of 'PERLND', 'IMPLND', 'RCHRES', or 'BMPRAC',
or missing (to get all) instead of {0}.
""".format(words[1])))

        if words[2] is not None:
            try:
                words[2] = int(words[2])
                if words[2] < 1 or words[2] > 999:
                    raise ValueError()
            except (ValueError, TypeError):
                raise ValueError(
                    tsutils.error_wrapper("""
The land use element must be an integer from 1 to 999 inclusive,
instead of {0}.
""".format(words[2])))

        if words[3] is not None:
            words[3] = words[3].upper()
            if words[3] not in testem[words[1]]:
                raise ValueError(
                    tsutils.error_wrapper("""
The {0} operation type only allows the variable groups:
{1},
instead you gave {2}.
""".format(words[1], testem[words[1]][:-1], words[3])))

        words.append(intervalcode)
        lablist.append(words)

    with open(binfilename, "rb") as fl:

        mindate = datetime.datetime.max
        maxdate = datetime.datetime.min

        labeltest = {}
        vnames = {}
        ndates = {}
        rectype = 0
        fl.read(1)
        while True:
            try:
                reclen1, reclen2, reclen3, reclen = struct.unpack(
                    "4B", fl.read(4))
            except struct.error:
                # End of file.
                break

            rectype, optype, lue, section = struct.unpack(
                "I8sI8s", fl.read(24))

            rectype = int(rectype)
            lue = int(lue)
            optype = optype.strip()
            section = section.strip()

            slen = 0
            if rectype == 0:
                reclen1 = int(reclen1 / 4)
                reclen2 = reclen2 * 64 + reclen1
                reclen3 = reclen3 * 16384 + reclen2
                reclen = reclen * 4194304 + reclen3 - 24
                while slen < reclen:
                    length = struct.unpack("I", fl.read(4))[0]
                    slen = slen + length + 4
                    variable_name = struct.unpack("{0}s".format(length),
                                                  fl.read(length))[0]
                    vnames.setdefault((lue, section), []).append(variable_name)

            elif rectype == 1:
                # Data record
                numvals = len(vnames[(lue, section)])

                (_, level, year, month, day, hour,
                 minute) = struct.unpack("7I", fl.read(28))

                vals = struct.unpack("{0}f".format(numvals),
                                     fl.read(4 * numvals))
                if hour == 24:
                    ndate = (datetime.datetime(year, month, day) +
                             datetime.timedelta(hours=24) +
                             datetime.timedelta(minutes=minute))
                else:
                    ndate = datetime.datetime(year, month, day, hour, minute)

                for i, vname in enumerate(vnames[(lue, section)]):
                    tmpkey = (
                        None,
                        optype.decode("ascii"),
                        int(lue),
                        section.decode("ascii"),
                        vname.decode("ascii"),
                        level,
                    )
                    if catalog_only is False:
                        res = tupleSearch(tmpkey, lablist)
                        if res:
                            nres = (res[0][0], ) + res[0][1][1:]
                            labeltest[nres[0]] = 1
                            collect_dict.setdefault(nres, []).append(vals[i])
                            ndates.setdefault(level, {})[ndate] = 1
                    else:
                        mindate = min(mindate, ndate)
                        maxdate = max(maxdate, ndate)
                        pdoffset = code2freqmap[level]
                        collect_dict[tmpkey[1:]] = (
                            pd.Period(mindate, freq=pdoffset),
                            pd.Period(maxdate, freq=pdoffset),
                        )
            else:
                fl.seek(-31, 1)

            # The following should be 1 or 2, but I don't know how to calculate
            # it, so I just use that the 'rectype' must be 0 or 1, and if not
            # rewind the correct amount.
            fl.read(2)

    if not collect_dict:
        raise ValueError(
            tsutils.error_wrapper("""
The label specifications below matched no records in the binary file.

{lablist}
""".format(**locals())))

    if catalog_only is False:
        not_in_file = []
        for loopcnt in list(range(len(lablist))):
            if loopcnt not in labeltest.keys():
                not_in_file.append(labels[loopcnt])
        if not_in_file:
            warnings.warn(
                tsutils.error_wrapper("""
The specification{0} {1}
matched no records in the binary file.
""".format("s"[len(not_in_file) == 1:], not_in_file)))

    return ndates, collect_dict
예제 #15
0
def extract(hbnfilename, interval, *labels, **kwds):
    r"""Returns a DataFrame from a HSPF binary output file."""
    try:
        time_stamp = kwds.pop("time_stamp")
    except KeyError:
        time_stamp = "begin"
    if time_stamp not in ["begin", "end"]:
        raise ValueError(
            tsutils.error_wrapper("""
The "time_stamp" optional keyword must be either
"begin" or "end".  You gave {0}.
""".format(time_stamp)))

    try:
        sortall = bool(kwds.pop("sorted"))
    except KeyError:
        sortall = False
    if not (sortall is True or sortall is False):
        raise ValueError(
            tsutils.error_wrapper("""
The "sorted" optional keyword must be either
True or False.  You gave {0}.
""".format(sortall)))

    if len(kwds) > 0:
        raise ValueError(
            tsutils.error_wrapper("""
The extract command only accepts optional keywords 'time_stamp' and
'sorted'.  You gave {0}.
""".format(list(kwds.keys()))))

    interval = interval.lower()
    if interval not in ["bivl", "daily", "monthly", "yearly"]:
        raise ValueError(
            tsutils.error_wrapper("""
The "interval" argument must be one of "bivl",
"daily", "monthly", or "yearly".  You supplied
"{0}".
""".format(interval)))

    index, data = _get_data(hbnfilename, interval, labels, catalog_only=False)
    index = index[interval2codemap[interval]]
    index = sorted(index.keys())
    skeys = list(data.keys())
    if sortall is True:
        skeys.sort(key=lambda tup: tup[1:])
    else:
        skeys.sort()

    result = pd.DataFrame(
        pd.concat([pd.Series(data[i], index=index) for i in skeys],
                  sort=False,
                  axis=1).reindex(pd.Index(index)))
    columns = ["{0}_{1}_{2}_{3}".format(i[1], i[2], i[4], i[5]) for i in skeys]
    result.columns = columns

    if time_stamp == "begin":
        result = tsutils.asbestfreq(result)
        result = result.tshift(-1)

    result.index.name = "Datetime"

    return result
예제 #16
0
    def read_dsn(self, wdmpath, dsn, start_date=None, end_date=None):
        """Read from a DSN."""
        if not os.path.exists(wdmpath):
            raise ValueError(
                tsutils.error_wrapper("""
File {0} does not exist.
""".format(wdmpath)))

        # Call wdatim_ to get LLSDAT, LLEDAT, TSTEP, TCODE
        desc_dsn = self.describe_dsn(wdmpath, dsn)

        llsdat = desc_dsn["llsdat"]
        lledat = desc_dsn["lledat"]
        tcode = desc_dsn["tcode"]
        tstep = desc_dsn["tstep"]
        tsfill = desc_dsn["tsfill"]

        # These calls convert 24 to midnight of the next day
        self.timcvt(llsdat)
        self.timcvt(lledat)

        if start_date is not None:
            start_date = self.dateconverter(start_date)
            start_date = datetime.datetime(*start_date)
            if start_date > datetime.datetime(*lledat):
                raise ValueError(
                    tsutils.error_wrapper("""
The requested start date ({0}) is after the end date ({1})
of the time series in the WDM file.
""".format(start_date, datetime.datetime(*lledat))))

        if end_date is not None:
            end_date = self.dateconverter(end_date)
            end_date = datetime.datetime(*end_date)
            if end_date < datetime.datetime(*llsdat):
                raise ValueError(
                    tsutils.error_wrapper("""
The requested end date ({0}) is before the start date ({1})
of the time series in the WDM file.
""".format(end_date, datetime.datetime(*llsdat))))

        iterm = self.timdif(llsdat, lledat, tcode, tstep)

        dtran = 0
        qualfg = 30
        # Get the data and put it into dictionary
        wdmfp = self._open(wdmpath, 59, ronwfg=1)
        dataout, retcode = self.wdtget(wdmfp, dsn, tstep, llsdat, iterm, dtran,
                                       qualfg, tcode)
        self._close(wdmpath)

        if len(dataout) == 0:
            return pd.DataFrame()

        self._retcode_check(retcode,
                            additional_info="wdtget file={0} DSN={1}".format(
                                wdmpath, dsn))

        index = pd.date_range(
            datetime.datetime(*llsdat),
            periods=iterm,
            freq="{0:d}{1}".format(tstep, MAPTCODE[tcode]),
        )

        # Convert time series to pandas DataFrame
        tmpval = pd.DataFrame(
            pd.Series(
                dataout,
                index=index,
                name="{0}_DSN_{1}".format(os.path.basename(wdmpath), dsn),
            ),
            dtype=np.float64,
        )

        tmpval = tsutils.common_kwds(tmpval,
                                     start_date=start_date,
                                     end_date=end_date)
        tmpval.replace(tsfill, np.nan, inplace=True)
        tmpval.index.name = "Datetime"
        return tmpval
예제 #17
0
def wind_speed(
    method: Literal["equal", "cosine", "random"],
    source_units,
    input_ts="-",
    columns=None,
    start_date=None,
    end_date=None,
    dropna="no",
    clean=False,
    round_index=None,
    skiprows=None,
    index_type="datetime",
    names=None,
    target_units=None,
    print_input=False,
    a=None,
    b=None,
    t_shift=None,
):
    """Disaggregate daily to hourly data.
    disaggregate_wind(wind_daily,
                      method='equal',
                      a=None,
                      b=None,
                      t_shift=None):

    Args:
        wind_daily: daily values
        method: keyword specifying the disaggregation method to be used
        a: parameter a for the cosine function
        b: parameter b for the cosine function
        t_shift: parameter t_shift for the cosine function
    """
    target_units = single_target_units(source_units, target_units, "m/s")

    target_units = target_units[0] * len(source_units)

    pd.options.display.width = 60

    if method == "cosine" and (a is None or b is None or t_shift is None):
        raise ValueError(
            tsutils.error_wrapper("""
For the "cosine" method, requires the `a`, `b`, and `t_shift`
keywords.  You gave:

a = {a}

b = {b}

t_shift = {t_shift}
""".format(**locals())))
    tsd = tsutils.common_kwds(
        tsutils.read_iso_ts(input_ts,
                            skiprows=skiprows,
                            names=names,
                            index_type=index_type),
        start_date=start_date,
        end_date=end_date,
        pick=columns,
        round_index=round_index,
        dropna=dropna,
        source_units=source_units,
        target_units=target_units,
        clean=clean,
    )

    return tsutils.return_input(
        print_input,
        tsd,
        pd.DataFrame(
            disaggregate_wind(tsd, method=method, a=a, b=b, t_shift=t_shift)),
    )
예제 #18
0
def humidity(
    method: Literal["equal", "minimal", "dewpoint_regression",
                    "linear_dewpoint_variation", "min_max",
                    "month_hour_precip_mean", ],
    source_units,
    input_ts="-",
    columns=None,
    start_date=None,
    end_date=None,
    dropna="no",
    clean=False,
    round_index=None,
    skiprows=None,
    index_type="datetime",
    names=None,
    target_units=None,
    print_input=False,
    hum_min_col=None,
    hum_max_col=None,
    hum_mean_col=None,
    temp_min_col=None,
    temp_max_col=None,
    a0=None,
    a1=None,
    kr=None,
    hourly_temp=None,
    preserve_daily_mean=None,
):
    """Disaggregate daily humidity to hourly humidity data.

    disaggregate_humidity(data_daily, method='equal', temp=None,
                          a0=None, a1=None, kr=None,
                          month_hour_precip_mean=None, preserve_daily_mean=False):
    Args:
        daily_data: daily values
        method: keyword specifying the disaggregation method to be used
        temp: hourly temperature time series (necessary for some methods)
        kr: parameter for linear_dewpoint_variation method (6 or 12)
        month_hour_precip_mean: [month, hour, precip(y/n)] categorical mean values
        preserve_daily_mean: if True, correct the daily mean values of the disaggregated
            data with the observed daily means.


    if method=equal
        .hum from hum_mean_col
    if method in ["minimal", "dewpoint_regression", "linear_dewpoint_variation"]
        .tmin from temp_min_col
    if method=min_max need
        .hum_min from hum_min_col
        .hum_max from hum_max_col
        .tmin from temp_min_col
        .tmax from temp_max_col

    if method in ["dewpoint_regression", "linear_dewpoint_variation"]
        a0
        a1
    if method in ["linear_dewpoint_variation"]
        kr

    if preserve_daily_mean is True
        .hum from hum_mean_col

    if method in ["minimal",
                  "dewpoint_regression",
                  "linear_dewpoint_variation",
                  "min_max"]
        need HOURLY temperature in 'temp'
    """
    target_units = single_target_units(source_units, target_units, "")

    if method == "equal" and hum_mean_col is None:
        raise ValueError(
            tsutils.error_wrapper("""
If `method` is "equal" then the mean daily humidity is a required column
identified with the keyword `hum_mean_col`"""))

    if (method
            in ["minimal", "dewpoint_regression", "linear_dewpoint_variation"]
            and temp_min_col is None):
        raise ValueError(
            tsutils.error_wrapper("""
If `method` is "minimal", "dewpoint_regression", or
"linear_dewpoint_variation" then the minimum daily temperature is a required
column identified with the keyword `temp_min_col`."""))

    if method == "min_max" and (hum_min_col is None or hum_max_col is None or
                                temp_min_col is None or temp_max_col is None):
        raise ValueError(
            tsutils.error_wrapper("""
If `method` is "min_max" then:

Minimum daily humidity is a required column identified with the keyword
`hum_min_col`.  You gave {hum_min_col}.

Maximum daily humidity is a required column identified with the keyword
`hum_max_col`.  You gave {hum_max_col}.

Minimum daily temperature is a required column identified with the
keyword `temp_min_col`.  You gave {temp_min_col}.

Maximum daily temperature is a required column identified with the
keyword `temp_max_col`.  You gave {temp_max_col}.
""".format(**locals())))

    if method in ["dewpoint_regression", "linear_dewpoint_variation"
                  ] and (a0 is None or a1 is None):
        raise ValueError(
            tsutils.error_wrapper("""
If `method` is "dewpoint_regression" or "linear_dewpoint_variation" then
a0 and a1 must be given."""))

    if method == "linear_dewpoint_variation" and kr is None:
        raise ValueError(
            tsutils.error_wrapper("""
If `method` is "linear_dewpoint_variation" then kr must be given"""))

    if (method in [
            "minimal",
            "dewpoint_regression",
            "linear_dewpoint_variation",
            "min_max",
            "month_hour_precip_mean",
    ] and hourly_temp is None):
        raise ValueError(
            tsutils.error_wrapper("""
If `method` is "minimal", "dewpoint_regression",
"linear_dewpoint_variation", or "min_max" then hourly temperature is
required identified by the filename in keyword `hourly_temp`."""))

    pd.options.display.width = 60

    tsd = tsutils.common_kwds(
        tsutils.read_iso_ts(input_ts,
                            skiprows=skiprows,
                            names=names,
                            index_type=index_type),
        start_date=start_date,
        end_date=end_date,
        pick=columns,
        round_index=round_index,
        dropna=dropna,
        source_units=source_units,
        target_units=target_units,
        clean=clean,
    )

    if method == "equal":
        tsd["hum"] = tsd["hum_mean_col"]

    if method in [
            "minimal", "dewpoint_regression", "linear_dewpoint_variation"
    ]:
        tsd["tmin"] = tsd["temp_min_col"]

    if method == "min_max":
        tsd["hum_min"] = tsd["hum_min_col"]
        tsd["hum_max"] = tsd["hum_max_col"]
        tsd["tmin"] = tsd["temp_min_col"]
        tsd["tmax"] = tsd["temp_max_col"]

    if preserve_daily_mean is not None:
        tsd["hum"] = tsd[preserve_daily_mean]
        preserve_daily_mean = True

    if method in [
            "minimal",
            "dewpoint_regression",
            "linear_dewpoint_variation",
            "min_max",
            "month_hour_precip_mean",
    ]:
        hourly_temp = tstoolbox.read(hourly_temp)

    if method == "month_hour_precip_mean":
        month_hour_precip_mean = calculate_month_hour_precip_mean(hourly_temp)

    ntsd = pd.DataFrame(
        disaggregate_humidity(
            tsd,
            method=method,
            temp=hourly_temp,
            a0=a0,
            a1=a1,
            kr=kr,
            preserve_daily_mean=preserve_daily_mean,
            month_hour_precip_mean=month_hour_precip_mean,
        ))

    ntsd.columns = ["humidity:{0}:disagg"]

    return tsutils.return_input(print_input, tsd, ntsd)
예제 #19
0
def csvtowdm(
    wdmpath,
    dsn,
    start_date=None,
    end_date=None,
    columns=None,
    force_freq=None,
    groupby=None,
    round_index=None,
    clean=False,
    target_units=None,
    source_units=None,
    input_ts="-",
):
    """Write data from a CSV file to a DSN.

    File can have comma separated
    'year', 'month', 'day', 'hour', 'minute', 'second', 'value'
    OR
    'date/time string', 'value'

    Parameters
    ----------
    wdmpath
        Path and WDM
        filename.
    dsn
        The Data Set Number in the WDM
        file.
    {input_ts}
    {start_date}
    {end_date}
    {columns}
    {force_freq}
    {groupby}
    {round_index}
    {clean}
    {target_units}
    {source_units}

    """
    tsd = tsutils.common_kwds(
        tsutils.read_iso_ts(input_ts),
        start_date=start_date,
        end_date=end_date,
        pick=columns,
        force_freq=force_freq,
        groupby=groupby,
        round_index=round_index,
        clean=clean,
        target_units=target_units,
        source_units=source_units,
    )

    if len(tsd.columns) > 1:
        raise ValueError(
            tsutils.error_wrapper("""
The input data set must contain only 1 time series.
You gave {0}.
""".format(len(tsd.columns))))

    _writetodsn(wdmpath, dsn, tsd)
예제 #20
0
def _writetodsn(wdmpath, dsn, data):
    """Local function to write Pandas data frame to DSN."""
    data = tsutils.asbestfreq(data)
    infer = data.index.freqstr
    pandacode = infer.lstrip("0123456789")
    tstep = infer[:infer.find(pandacode)]
    try:
        tstep = int(tstep)
    except ValueError:
        tstep = 1

    invmapcode = {
        1: "second",
        2: "minute",
        3: "hour",
        4: "day",
        5: "month",
        6: "annual",
    }

    mapcode = {
        "A": 6,  # annual
        "A-DEC": 6,  # annual
        "AS": 6,  # annual start
        "M": 5,  # month
        "MS": 5,  # month start
        "D": 4,  # day
        "H": 3,  # hour
        "T": 2,  # minute
        "S": 1,  # second
    }
    try:
        finterval = mapcode[pandacode]
    except KeyError:
        raise KeyError("""
*
*   wdmtoolbox only understands PANDAS time intervals of :
*   'A', 'AS', 'A-DEC' for annual,
*   'M', 'MS' for monthly,
*   'D', 'H', 'T', 'S' for day, hour, minute, and second.
*   wdmtoolbox thinks this series is {0}.
*
""".format(pandacode))

    # Convert string to int
    dsn = int(dsn)

    # Make sure that input data metadata matches target DSN
    desc_dsn = _describedsn(wdmpath, dsn)

    dsntcode = desc_dsn["tcode"]
    if finterval != dsntcode:
        raise ValueError(
            tsutils.error_wrapper("""
The DSN {2} has a tcode of {0} ({3}),
but the data has a tcode of {1} ({4}).
""".format(
                dsntcode,
                finterval,
                dsn,
                invmapcode[dsntcode],
                invmapcode[finterval],
            )))

    dsntstep = desc_dsn["tstep"]
    if dsntstep != tstep:
        raise ValueError(
            tsutils.error_wrapper("""
The DSN has a tstep of {0}, but the data has a tstep of {1}.
""".format(dsntstep, tstep)))

    WDM.write_dsn(wdmpath, dsn, data)
예제 #21
0
def process(uci,
            hbn,
            pwbe,
            year,
            ofilename,
            modulus,
            tablefmt,
            float_format=".2f"):

    from hspfbintoolbox.hspfbintoolbox import extract

    if ofilename:
        sys.stdout = open(ofilename, "w")

    try:
        year = int(year)
    except TypeError:
        pass

    lcnames = dict(zip(range(modulus + 1, 1), zip(range(modulus + 1, 1))))
    inverse_lcnames = dict(
        zip(range(modulus + 1, 1), zip(range(modulus + 1, 1))))
    inverse_lc = {}

    lnds = {}

    if uci is not None:
        with open(uci) as fp:
            content = fp.readlines()

        if not os.path.exists(hbn):
            raise ValueError(f"""
*
*   File {hbn} does not exist.
*
""")

        content = [i[:80] for i in content]
        content = [i.rstrip() for i in content]

        schematic_start = content.index("SCHEMATIC")
        schematic_end = content.index("END SCHEMATIC")
        schematic = content[schematic_start:schematic_end + 1]

        perlnd_start = content.index("PERLND")
        perlnd_end = content.index("END PERLND")
        perlnd = content[perlnd_start:perlnd_end + 1]

        pgeninfo_start = perlnd.index("  GEN-INFO")
        pgeninfo_end = perlnd.index("  END GEN-INFO")
        pgeninfo = perlnd[pgeninfo_start:pgeninfo_end + 1]

        masslink_start = content.index("MASS-LINK")
        masslink_end = content.index("END MASS-LINK")
        masslink = content[masslink_start:masslink_end + 1]

        lcnames = {}
        inverse_lcnames = {}
        inverse_lc = {}
        for line in pgeninfo[1:-1]:
            if "***" in line:
                continue
            if "" == line.strip():
                continue
            try:
                _ = int(line[5:10])
                continue
            except ValueError:
                pass
            lcnames.setdefault(line[10:30].strip(), []).append(int(line[:5]))
            inverse_lcnames[int(line[:5])] = line[10:30].strip()
            inverse_lc[int(line[:5]) % modulus] = line[10:30].strip()

        masslink = [i for i in masslink if "***" not in i]
        masslink = [i for i in masslink if len(i.strip()) > 0]
        masslink = " ".join(masslink)
        mlgroups = re.findall(
            r"  MASS-LINK +?([0-9]+).*?LND     [PI]WATER [PS][EU]RO.*?  END MASS-LINK +?\1 ",
            masslink,
        )

        for line in schematic[3:-1]:
            if "***" in line:
                continue
            if "" == line:
                continue
            words = line.split()
            if words[0] in ["PERLND", "IMPLND"] and words[5] in mlgroups:
                lnds[(words[0], int(words[1]))] = lnds.setdefault(
                    (words[0], int(words[1])), 0.0) + float(words[2])

    try:
        pdf = extract(hbn, "yearly", ",,,")
    except ValueError:
        raise ValueError(
            tsutils.error_wrapper(f"""
The binary file "{hbn}" does not have consistent ending months between PERLND and
IMPLND.  This could be caused by the BYREND (Binary YeaR END) being set
differently in the PERLND:BINARY-INFO and IMPLND:BINARY-INFO, or you could
have the PRINT-INFO bug.  To work around the PRINT-INFO bug, add a PERLND
PRINT-INFO block, setting the PYREND here will actually work in the
BINARY-INFO block.
"""))

    if year is not None:
        pdf = pd.DataFrame(pdf.loc[f"{year}-01-01", :]).T
    pdf = pdf[[i for i in pdf.columns if "PERLND" in i or "IMPLND" in i]]

    mindex = [i.split("_") for i in pdf.columns]
    mindex = [(i[0], int(i[1]), i[2], int(i[1]) % modulus) for i in mindex]
    mindex = pd.MultiIndex.from_tuples(mindex,
                                       names=["op", "number", "wbt", "lc"])
    pdf.columns = mindex
    pdf = pdf.sort_index(axis="columns")
    mindex = pdf.columns
    aindex = [(i[0], i[1]) for i in pdf.columns]
    mindex = [(
        i[0],
        int(i[1]),
        i[2],
        int(i[1]) % modulus,
        float(lnds.setdefault(j, 0.0)),
        str(inverse_lcnames.setdefault(int(i[1]), "")),
    ) for i, j in zip(mindex, aindex)]
    mindex = pd.MultiIndex.from_tuples(
        mindex, names=["op", "number", "wbt", "lc", "area", "lcname"])
    pdf.columns = mindex

    nsum = {}
    areas = {}
    namelist = {}
    setl = [i[1] for i in pwbe]
    setl = [item for sublist in setl for item in sublist]
    for lue in ["PERLND", "IMPLND"]:
        for wbterm in [i[0] for i in setl if i[0]]:
            for lc in list(range(1, modulus + 1)):
                try:
                    subset = pdf.loc[:, (lue, slice(None), wbterm, lc,
                                         slice(None), slice(None))]
                except KeyError:
                    continue

                _give_negative_warning(subset)

                if uci is None:
                    if subset.empty is True:
                        nsum[(lue, lc, wbterm)] = 0.0
                        if (lue, lc) not in namelist:
                            namelist[(lue, lc)] = ""
                    else:
                        nsum[(lue, lc,
                              wbterm)] = subset.mean(axis="columns").mean()
                        namelist[(lue, lc)] = inverse_lc.setdefault(lc, lc)
                else:
                    sareas = subset.columns.get_level_values("area")
                    ssareas = sum(sareas)
                    if (lue, lc) not in areas:
                        areas[(lue, lc)] = ssareas

                    if subset.empty is True or ssareas == 0:
                        nsum[(lue, lc, wbterm)] = 0.0
                        if (lue, lc) not in namelist:
                            namelist[(lue, lc)] = ""
                    else:
                        fa = sareas / areas[(lue, lc)]
                        nsum[(lue, lc,
                              wbterm)] = ((subset *
                                           fa).sum(axis="columns").mean())
                        namelist[(lue, lc)] = inverse_lc.setdefault(lc, lc)

    newnamelist = []
    for key, value in sorted(namelist.items()):
        if key[0] != "PERLND":
            continue
        if key[1] == value:
            newnamelist.append(f"{key[1]}")
        else:
            newnamelist.append(f"{key[1]}-{value}")

    printlist = []
    printlist.append([" "] + newnamelist + ["ALL"])

    mapipratio = {}
    mapipratio["PERLND"] = 1.0
    mapipratio["IMPLND"] = 1.0

    if uci is not None:
        pareas = []
        pnl = []
        iareas = []
        for nloper, nllc in namelist:
            if nloper == "PERLND":
                pnl.append((nloper, nllc))
                pareas.append(areas[("PERLND", nllc)])
        # If there is a PERLND there must be a IMPLND.
        for ploper, pllc in pnl:
            try:
                iareas.append(areas[("IMPLND", pllc)])
            except KeyError:
                iareas.append(0.0)
        ipratio = np.array(iareas) / (np.array(pareas) + np.array(iareas))
        ipratio = np.nan_to_num(ipratio)
        ipratio = np.pad(ipratio, (0, len(pareas) - len(iareas)), "constant")
        sumareas = sum(pareas) + sum(iareas)

        percent_areas = {}
        percent_areas["PERLND"] = np.array(pareas) / sumareas * 100
        percent_areas["IMPLND"] = np.array(iareas) / sumareas * 100
        percent_areas[
            "COMBINED"] = percent_areas["PERLND"] + percent_areas["IMPLND"]

        printlist.append(["PERVIOUS"])
        printlist.append(["AREA(acres)"] +
                         [str(i) if i > 0 else ""
                          for i in pareas] + [str(sum(pareas))])

        printlist.append(
            ["AREA(%)"] +
            [str(i) if i > 0 else "" for i in percent_areas["PERLND"]] +
            [str(sum(percent_areas["PERLND"]))])

        printlist.append([])
        printlist.append(["IMPERVIOUS"])
        printlist.append(["AREA(acres)"] +
                         [str(i) if i > 0 else ""
                          for i in iareas] + [str(sum(iareas))])

        printlist.append(
            ["AREA(%)"] +
            [str(i) if i > 0 else "" for i in percent_areas["IMPLND"]] +
            [str(sum(percent_areas["IMPLND"]))])
        printlist.append([])

        mapipratio["PERLND"] = 1.0 - ipratio
        mapipratio["IMPLND"] = ipratio

    mapr = {}
    mapr["PERLND"] = 1.0
    mapr["IMPLND"] = 1.0

    for term, op in pwbe:
        if not term:
            printlist.append([])
            continue

        test = [i[1] for i in op]
        if "IMPLND" in test and "PERLND" in test:
            maprat = mapipratio
            sumop = "COMBINED"
        else:
            maprat = mapr
            sumop = test[0]

        te = [0.0]
        for sterm, operation in op:
            try:
                tmp = np.array([
                    nsum[(*i, sterm)] for i in sorted(namelist)
                    if i[0] == operation
                ])
                if uci is not None:
                    tmp = (np.pad(tmp,
                                  (0, len(pareas) - len(tmp)), "constant") *
                           maprat[operation])
                te = te + tmp
            except KeyError:
                pass
        if uci is None:
            te = ([term] + [str(i) if i > 0 else ""
                            for i in te] + [str(sum(te) / len(te))])
        else:
            nte = np.pad(te, (0, len(iareas) - len(te)), "constant")
            te = ([term] + [str(i) if i > 0 else "" for i in nte] +
                  [str(sum(nte * percent_areas[sumop]) / 100)])
        printlist.append(te)

    if tablefmt in ["csv", "tsv", "csv_nos", "tsv_nos"]:
        sep = {
            "csv": ",",
            "tsv": "\\t",
            "csv_nos": ",",
            "tsv_nos": "\\t"
        }[tablefmt]
        fmt = simple_separated_format(sep)
    else:
        fmt = tablefmt
    if tablefmt in ["csv_nos", "tsv_nos"]:
        print(
            re.sub(" *, *", ",",
                   tabulate(printlist, tablefmt=fmt, floatfmt=float_format)))
    else:
        print(tabulate(printlist, tablefmt=fmt, floatfmt=float_format))
예제 #22
0
def temperature(
    method: Literal["sine_min_max", "sine_mean", "sine", "mean_course_min_max",
                    "mean_course_mean"],
    source_units,
    min_max_time: Literal["fix", "sun_loc", "sun_loc_shift"] = "fix",
    mod_nighttime: bool = False,
    input_ts="-",
    start_date=None,
    end_date=None,
    dropna="no",
    clean=False,
    round_index=None,
    skiprows=None,
    index_type="datetime",
    names=None,
    print_input=False,
    target_units=None,
    max_delta: bool = False,
    temp_min_col: Optional[Union[tsutils.IntGreaterEqualToOne, str]] = None,
    temp_max_col: Optional[Union[tsutils.IntGreaterEqualToOne, str]] = None,
    temp_mean_col: Optional[Union[tsutils.IntGreaterEqualToOne, str]] = None,
    lat: Optional[FloatLatitude] = None,
    lon: Optional[FloatLongitude] = None,
    hourly: Optional[str] = None,
):
    """Disaggregate daily to hourly data.

    disaggregate_temperature(data_daily,
                             method='sine_min_max',
                             min_max_time='fix',
                             mod_nighttime=False,
                             max_delta=None,
                             mean_course=None,
                             sun_times=None):

    data_daily :      daily data
    method :          method to disaggregate
    min_max_time:     "fix" - min/max temperature at fixed times 7h/14h,
                      "sun_loc" - min/max calculated by sunrise/sunnoon + 2h,
                      "sun_loc_shift" - min/max calculated by sunrise/sunnoon +
                                        monthly mean shift,
    mod_nighttime:    ?
    max_delta:        maximum monthly temperature shift as returned by
                      get_shift_by_data()
    mean_course:      ?
    sun_times:        times of sunrise/noon as returned by get_sun_times()

    if method in ('sine_min_max', 'sine_mean', 'sine')
        .tmin from temp_min_col
        .tmax from temp_max_col
        .temp from temp_mean_col

    if method in ('mean_course_min', 'mean_course_mean')
        .tmin from temp_min_col
        .tmax from temp_max_col
        require hourly temperature perhaps from nearby station in 'mean_course'

    if method == 'mean_course_mean'
        .temp from temp_mean_col

    sun_times = get_sun_times(dates, lon, lat, round(lon/15.0))

    max_delta = get_shift_by_data(temp_hourly, lon, lat, round(lon/15.0))
    """
    target_units = single_target_units(source_units, target_units, "degC")

    pd.options.display.width = 60

    if (method in ["mean_course_min", "mean_course_mean"]
            or max_delta is True) and hourly is None:
        raise ValueError(
            tsutils.error_wrapper("""
The methods "mean_course_min", "mean_course_mean", or if `max_delta` is
True, require a HOURLY temperature values in the CSV file specified by the
keyword `hourly`."""))

    if method in ["mean_course_min", "mean_course_mean"] or max_delta is True:
        hourly = tstoolbox.read(hourly)

    if max_delta is True:
        max_delta = get_shift_by_data(hourly, lon, lat, round(lon / 15.0))
    else:
        max_delta = None

    if temp_min_col is None or temp_max_col is None:
        raise ValueError(
            tsutils.error_wrapper("""
For "temperature" disaggregation you need to supply the daily minimum
column (name or number, data column numbering starts at 1) and the daily
maximum column (name or number).

Instead `temp_min_col` is {temp_min_col} and `temp_max_col` is
{temp_max_col}""".format(**locals())))

    columns = []
    try:
        temp_min_col = int(temp_min_col)
    except TypeError:
        pass
    columns.append(temp_min_col)

    try:
        temp_max_col = int(temp_max_col)
    except TypeError:
        pass
    columns.append(temp_max_col)

    if temp_mean_col is not None:
        try:
            temp_mean_col = int(temp_mean_col)
        except TypeError:
            pass
        columns.append(temp_mean_col)

    tsd = tsutils.common_kwds(
        tsutils.read_iso_ts(input_ts,
                            skiprows=skiprows,
                            names=names,
                            index_type=index_type),
        start_date=start_date,
        end_date=end_date,
        pick=columns,
        round_index=round_index,
        dropna=dropna,
        source_units=source_units,
        target_units=target_units,
        clean=clean,
    )

    if len(tsd.columns) == 3:
        tsd.columns = ["tmin", "tmax", "temp"]
    else:
        tsd.columns = ["tmin", "tmax"]

    if any((tsd.tmax <= tsd.tmin).dropna()):
        raise ValueError(
            tsutils.error_wrapper("""
On the following dates:

{0},

minimum temperature values in column "{1}" are greater than or equal to
the maximum temperature values in column "{2}".""".format(
                tsd[tsd.tmax <= tsd.tmin].index, temp_min_col, temp_max_col)))

    if temp_mean_col is None:
        warnings.warn(
            tsutils.error_wrapper("""
Since `temp_mean_col` is None, the average daily temperature will be
estimated by the average of `temp_min_col` and `temp_max_col`""".format(
                **locals())))
        tsd["temp"] = (tsd.tmin + tsd.tmax) / 2.0

        if any((tsd.tmin >= tsd.temp).dropna()) or any(
            (tsd.tmax <= tsd.temp).dropna()):
            raise ValueError(
                tsutils.error_wrapper("""
On the following dates:

{0},

the daily average is either below or equal to the minimum temperature in column {1}
or higher or equal to the maximum temperature in column {2}.""".format(
                    tsd[tsd.tmin >= tsd.temp | tsd.tmax <= tsd.temp],
                    temp_min_col,
                    temp_max_col,
                )))

    if min_max_time == "fix":
        # Not dependent on sun, just average values.
        sun_times = pd.DataFrame(
            index=[1], columns=["sunrise", "sunnoon", "sunset", "daylength"])
        sun_times.sunrise = 7
        sun_times.sunnoon = 12
        sun_times.sunset = 19
        sun_times.daylength = 12
    else:
        if lat is None or lon is None:
            raise ValueError(
                tsutils.error_wrapper("""
The `min_max_time` options other than "fix" require calculation of
sunrise, sun noon, sunset, and day length.  The calculation requires the
latitude with keyword "lat" and longitude with keyword "lon".
You gave:

    lat={lat}

    lon={lon}
""".format(**locals())))

        sun_times = get_sun_times(tsd.index, float(lon), float(lat),
                                  round(lon / 15.0))

    ntsd = pd.DataFrame(
        disaggregate_temperature(
            tsd,
            method=method,
            min_max_time=min_max_time,
            mod_nighttime=mod_nighttime,
            max_delta=max_delta,
            mean_course=hourly,
            sun_times=sun_times,
        ))

    ntsd.columns = ["temperature:{0}:disagg".format(target_units[0])]

    return tsutils.return_input(print_input, tsd, ntsd)
예제 #23
0
파일: pet.py 프로젝트: timcera/mettoolbox
def _preprocess(
    input_ts,
    temp_min_col,
    temp_max_col,
    temp_mean_col,
    temp_min_required,
    temp_max_required,
    skiprows,
    names,
    index_type,
    start_date,
    end_date,
    round_index,
    dropna,
    clean,
    source_units,
):
    columns, column_names = utils._check_temperature_cols(
        temp_min_col=temp_min_col,
        temp_max_col=temp_max_col,
        temp_mean_col=temp_mean_col,
        temp_min_required=temp_min_required,
        temp_max_required=temp_max_required,
    )

    tsd = tsutils.common_kwds(
        input_ts,
        skiprows=skiprows,
        names=names,
        index_type=index_type,
        start_date=start_date,
        end_date=end_date,
        pick=columns,
        round_index=round_index,
        dropna=dropna,
        clean=clean,
    )

    if source_units is None:
        # If "source_units" keyword is None must have source_units in column name.
        source_units = []
        for units in tsd.columns:
            words = units.split(":")
            if len(words) >= 2:
                source_units.append(words[1])
            else:
                raise ValueError(
                    tsutils.error_wrapper(
                        """
If "source_units" are not supplied as the second ":" delimited field in the column name
they must be supplied with the "source_units" keyword.  """
                    )
                )
    else:
        source_units = tsutils.make_list(source_units)
    if len(source_units) != len(tsd.columns):
        raise ValueError(
            tsutils.error_wrapper(
                """
The number of "source_units" terms must match the number of temperature columns.
                                               """
            )
        )
    interim_target_units = ["degC"] * len(tsd.columns)

    tsd = tsutils.common_kwds(
        tsd,
        source_units=source_units,
        target_units=interim_target_units,
    )

    tsd.columns = column_names

    tsd = utils._validate_temperatures(tsd, temp_min_col, temp_max_col)
    return tsd
예제 #24
0
def evaporation(
    method: Literal["trap", "fixed"],
    source_units,
    input_ts="-",
    columns=None,
    start_date=None,
    end_date=None,
    dropna="no",
    clean=False,
    round_index=None,
    skiprows=None,
    index_type="datetime",
    names=None,
    target_units=None,
    print_input=False,
    lat: Optional[FloatLatitude] = None,
):
    """Disaggregate daily to hourly data."""
    target_units = single_target_units(source_units, target_units)

    pd.options.display.width = 60

    if method == "trap" and lat is None:
        raise ValueError(
            tsutils.error_wrapper("""
The "trap" method requires latitude with the `lat` keyword.  You gave
"{lat}". """.format(**locals())))

    tsd = tsutils.common_kwds(
        tsutils.read_iso_ts(input_ts,
                            skiprows=skiprows,
                            names=names,
                            index_type=index_type),
        start_date=start_date,
        end_date=end_date,
        pick=columns,
        round_index=round_index,
        dropna=dropna,
        source_units=source_units,
        target_units=target_units,
        clean=clean,
    )

    ntsd = tsd.append(
        pd.DataFrame(columns=tsd.columns,
                     index=[tsd.index[-1] + datetime.timedelta(days=1)]))
    ndata = ntsd.resample("H").ffill()

    fdata = pd.DataFrame(columns=ndata.columns, index=ndata.index, dtype="f")

    if method == "trap":
        lrad = lat * np.pi / 180.0

        ad = 0.40928 * np.cos(0.0172141 * (172 - tsd.index.dayofyear))
        ss = np.sin(lrad) * np.sin(ad)
        cs = np.cos(lrad) * np.cos(ad)
        x2 = -ss / cs
        delt = 7.6394 * (np.pi / 2.0 - np.arctan(x2 / np.square(1 - x2**2)))
        sunr = 12.0 - delt / 2.0

        # develop hourly distribution given sunrise,
        # sunset and length of day (DELT)
        dtr2 = delt / 2.0
        dtr4 = delt / 4.0
        tr2 = sunr + dtr4
        tr3 = tr2 + dtr2
        tr4 = tr3 + dtr4

        for index, toss in enumerate(sunr):
            cdate = ntsd.index[index]
            fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate.
                                       day, int(sunr[index])), :, ] = 0.0
            fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate.day,
                                       int(tr4[index]) + 1), :, ] = 0.0
            fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate.
                                       day, int(round(tr2[index]))), :, ] = 1.0
            fdata.ix[datetime.datetime(cdate.year, cdate.month, cdate.
                                       day, int(round(tr3[index]))), :, ] = 1.0

        fdata.ix[0, :] = 0.0
        fdata.ix[-1, :] = 0.0

        fdata = fdata.interpolate("linear")

        fdata = fdata.fillna(0.0)

        fdata = fdata / fdata.groupby(
            pd.Grouper(freq="D")).sum().resample("H").ffill()

        fdata = fdata * ndata

        fdata = fdata.ix[:-1, :]

    elif method == "fixed":
        # DATA EVAPDIST / 0.000,0.000,0.000,0.000,0.000,0.000,0.019,0.041,
        # $ 0.067,0.088,0.102,0.110,0.110,0.110,0.105,0.095,
        # $ 0.081,0.055,0.017,0.000,0.000,0.000,0.000,0.000
        fdata = fdata.fillna(0.0)

        fdata[fdata.index.hour == 7] = 0.019
        fdata[fdata.index.hour == 8] = 0.041
        fdata[fdata.index.hour == 9] = 0.067
        fdata[fdata.index.hour == 10] = 0.088
        fdata[fdata.index.hour == 11] = 0.102
        fdata[fdata.index.hour == 12] = 0.110
        fdata[fdata.index.hour == 13] = 0.110
        fdata[fdata.index.hour == 14] = 0.110
        fdata[fdata.index.hour == 15] = 0.105
        fdata[fdata.index.hour == 16] = 0.095
        fdata[fdata.index.hour == 17] = 0.081
        fdata[fdata.index.hour == 18] = 0.055
        fdata[fdata.index.hour == 19] = 0.017

        fdata = fdata * ndata

        fdata = fdata.ix[:-1, :]

    return tsutils.print_input(print_input, tsd, fdata, None)