Exemplo n.º 1
0
    def tsa_group_by(self, datestring, tsa, subkeys, group_func):
        """
        TODO: make this method static, inteval should be in tsa
        group given tsa by subkeys, and use group_func to aggregate data
        first all Timeseries will be aligned in time, to get proper points in timeline

        parameters:
        tsa <TimeseriesArrayLazy>
        subkey <tuple> could also be empty, to aggregate everything
        group_func <func> like lambda a,b : (a+b)/2 to get averages
        slotlength <int> interval in seconds to correct every timeseries to

        returns:
        <TimeseriesArrayLazy>
        """
        # intermediated tsa
        tsa2 = TimeseriesArrayLazy(index_keys=subkeys, value_keys=tsa.value_keys, ts_key=tsa.ts_key, datatypes=tsa.datatypes)
        start_ts, _ = DataLogger.get_ts_for_datestring(datestring)
        ts_keyname = tsa.ts_key
        for data in tsa.export():
            # align timestamp
            nearest_slot = round((data[ts_keyname] - start_ts) / self.__interval)
            data[ts_keyname] = int(start_ts + nearest_slot * self.__interval)
            #data[ts_keyname] = align_timestamp(data[ts_keyname])
            tsa2.group_add(data, group_func)
        return tsa2
Exemplo n.º 2
0
    def import_tsa(self, datestring, tsa):
        """
        store tsa given in parameter in global_cache to make the data available

        usually this could be modfied existing tsa extended by some keys, or filtered or ...
        the structure has to be predefined in meta data

        the tsa can afterwards be accessed via normal frontends (web, api)

        parameters:
        tsa <TimeseriesArrayLazy> object
        """
        assert self.__index_keynames == tsa.index_keynames
        assert self.__value_keynames == tuple(tsa.value_keynames)
        cachedir = self.__get_cachedir(datestring)
        cachefilename = os.path.join(cachedir, TimeseriesArrayLazy.get_dumpfilename(tsa.index_keynames))
        if not os.path.isfile(cachefilename):
            tsa.dump_split(cachedir)
            tsastats = TimeseriesArrayStats(tsa)
            tsastats.dump(cachedir)
            qantile = QuantileArray(tsa, tsastats)
            q_cachefilename = os.path.join(cachedir, "quantile.json")
            qantile.dump(open(q_cachefilename, "wb"))
        else:
            raise StandardError("TSA Archive %s exists already in cache" % cachefilename)
Exemplo n.º 3
0
    def load_tsa_raw(self, datestring, timedelta=0):
        """
        read data from raw input files and return TimeseriesArrayLazy object

        parameters:
        datestring <str> isodate representation of date like 2015-12-31
        timedelta <int> amount second to correct raw input timestamps

        returns:
        <TimeseriesArrayLazy> object wich holds all data of this day
        """
        tsa = TimeseriesArrayLazy(self.__index_keynames, self.__value_keynames, datatypes=self.__datatypes)
        for rowdict in self.__get_raw_data_dict(datestring, timedelta):
            try:
                tsa.add(rowdict)
            except ValueError as exc:
                logging.exception(exc)
                logging.error("ValueError by adding this data to TimeseriesArrayLazy: %s", rowdict)
                raise exc
            except AssertionError as exc:
                logging.exception(exc)
                logging.error("AssertionError by adding this data to TimeseriesArrayLazy: %s", rowdict)
                raise exc
        return tsa
Exemplo n.º 4
0
 def fallback():
     """
     fallback method to use, if reading from cache data is not possible
     """
     tsa = self.load_tsa_raw(datestring, timedelta)
     tsa.dump_split(cachedir) # save full data
     # read the data afterwards to make sure there is no problem,
     if validate is True:
         tsa = TimeseriesArrayLazy.load_split(cachedir, self.__index_keynames, filterkeys=filterkeys, index_pattern=index_pattern, datatypes=self.__datatypes)
     # also generate TSASTATS and dump to cache directory
     tsastats = TimeseriesArrayStats(tsa) # generate full Stats
     tsastats.dump(cachedir) # save
     # and at last but not least quantile
     qantile = QuantileArray(tsa, tsastats)
     cachefilename = os.path.join(cachedir, "quantile.json")
     qantile.dump(open(cachefilename, "wb"))
     # finally return tsa
     return tsa
Exemplo n.º 5
0
    def load_tsa(self, datestring, filterkeys=None, index_pattern=None, timedelta=0, cleancache=False, validate=False):
        """
        caching version to load_tsa_raw
        if never called, get ts from load_tsa_raw, and afterwards dump_tsa
        on every consecutive call read from cached version
        use cleancache to remove caches

        parameters:
        datestring <str>
        filterkeys <tuple> or None default None
        index_pattern <str> or None default None
        timedelta <int> default 0
        cleancache <bool> default False
        validate <bool> if data is read from raw, dump it after initail read,
            and reread it afterwards to make sure the stored tsa is OK
            thats an performance issue

        returns
        <TimeseriesArrayLazy> object read from cachefile or from raw data
        """
        try:
            assert not_today(datestring)
        except AssertionError:
            raise DataLoggerLiveDataError("Reading from live data is not allowed")
        cachedir = self.__get_cachedir(datestring)
        cachefilename = os.path.join(cachedir, TimeseriesArrayLazy.get_dumpfilename(self.__index_keynames))
        def fallback():
            """
            fallback method to use, if reading from cache data is not possible
            """
            tsa = self.load_tsa_raw(datestring, timedelta)
            tsa.dump_split(cachedir) # save full data
            # read the data afterwards to make sure there is no problem,
            if validate is True:
                tsa = TimeseriesArrayLazy.load_split(cachedir, self.__index_keynames, filterkeys=filterkeys, index_pattern=index_pattern, datatypes=self.__datatypes)
            # also generate TSASTATS and dump to cache directory
            tsastats = TimeseriesArrayStats(tsa) # generate full Stats
            tsastats.dump(cachedir) # save
            # and at last but not least quantile
            qantile = QuantileArray(tsa, tsastats)
            cachefilename = os.path.join(cachedir, "quantile.json")
            qantile.dump(open(cachefilename, "wb"))
            # finally return tsa
            return tsa
        if not os.path.isfile(cachefilename):
            logging.info("cachefile %s does not exist, fallback read from raw data file", cachefilename)
            return fallback()
        if (os.path.isfile(cachefilename)) and (cleancache == True):
            logging.info("deleting cachefile %s and read from raw data file", cachefilename)
            os.unlink(cachefilename)
            return fallback()
        logging.debug("loading stored TimeseriesArrayLazy object file %s", cachefilename)
        try:
            tsa = TimeseriesArrayLazy.load_split(cachedir, self.__index_keynames, filterkeys=filterkeys, index_pattern=index_pattern, datatypes=self.__datatypes)
            return tsa
        except IOError:
            logging.error("IOError while reading from %s, using fallback", cachefilename)
            os.unlink(cachefilename)
            return fallback()
        except EOFError:
            logging.error("EOFError while reading from %s, using fallback", cachefilename)
            os.unlink(cachefilename)
            return fallback()