예제 #1
0
    def _timestamp_exist(self, name, data_frame):
        """
        sorted timestamp and check exist repeated timestamp
        :param name:
        :param data_frame:
        :return:
        """
        date_index = data_frame.index

        start_timestamp = data_frame.idxmin()
        start_timestamp = start_timestamp[0].to_pydatetime().timestamp()

        end_timestamp = data_frame.idxmax()
        end_timestamp = end_timestamp[0].to_pydatetime().timestamp()

        exist_length = self.count(name, start_timestamp, end_timestamp)

        if exist_length > 0:

            filter_data_frame = self.get_slice(name, start_timestamp,
                                               end_timestamp)

            filter_timestamps_index = filter_data_frame.index

            # check repeated data
            duplicated = numpy.intersect1d(
                filter_timestamps_index.to_pydatetime(),
                date_index.to_pydatetime())

            if duplicated.size > 0:
                raise RedisTimeSeriesError(
                    "add duplicated timestamp into redis -> timestamp:")
예제 #2
0
    def add(self, name, series):
        """
        :param name: redis key
        :param series: pandas.Series
        :return: bool
        """
        self._validate_key(name)

        if isinstance(series, pd.Series) and hasattr(
                series.name, "timestamp"):  # validate datetime

            series_time = series.name.to_pydatetime()
            timestamp = series_time.timestamp()

            with self._lock:
                if not self.exist_timestamp(name, timestamp):
                    values = series.tolist()
                    data = self._serializer.dumps(values)
                    if self.length(name) == self.max_length:
                        self.client.zpopmin(name)
                    return self.client.zadd(name, {data: timestamp})
        else:
            raise RedisTimeSeriesError(
                "Please check series Type or "
                "series name value is not pandas.DateTimeIndex type")
예제 #3
0
    def __init__(self,
                 redis_client,
                 columns,
                 index_name="timestamp",
                 dtypes=None,
                 max_length=100000,
                 *args,
                 **kwargs):
        """
        :param redis_client: redis client instance, only test with redis-py client.
        :param timezone: datetime timezone
        :param columns: pandas DataFrame columns names' list
        :param dtypes: pandas columns data type, example: {"value":"int64","value2":"float64"}
        :param max_length: int, max length of data to store the time-series data.
        :param args:
        :param kwargs:
        """
        super(RedisPandasTimeSeries, self).__init__(redis_client=redis_client,
                                                    max_length=max_length,
                                                    *args,
                                                    **kwargs)

        if index_name in columns:
            raise RedisTimeSeriesError("columns name can't contain index name")

        self.columns = columns
        self.dtypes = dtypes
        self.index_name = index_name
예제 #4
0
    def _timestamp_exist(self, name, array):
        """
        :param name:
        :param array: already sorted array list
        """

        end_timestamp = array[-1][0]  # max
        start_timestamp = array[0][0]  # min

        exist_length = self.count(name, start_timestamp, end_timestamp)

        if exist_length > 0:

            timestamp_array = numpy.fromiter((item[0] for item in array),
                                             numpy.float64)

            data_array = self.get_slice(name, start_timestamp, end_timestamp)
            filter_timestamp_array = numpy.fromiter(
                (item[0] for item in data_array), numpy.float64)

            duplicated = numpy.intersect1d(timestamp_array,
                                           filter_timestamp_array)

            if duplicated.size > 0:
                raise RedisTimeSeriesError(
                    "add duplicated timestamp into redis -> timestamp:")
예제 #5
0
 def _validate_key(self, name):
     """
     validate redis key can't contains specific names
     :param name:
     """
     if ":HASH" in name or ":ID" in name:
         raise RedisTimeSeriesError("Key can't contains `:HASH`, `:ID` values.")
예제 #6
0
    def _timestamp_exist(self, name, array):
        """
        :param name:
        :param array:
        :return:
        """
        if self.dtype:
            timestamp_array = array[self.timestamp_column_name]
        else:
            timestamp_array = array[:, self.timestamp_column_index]

        start_timestamp = timestamp_array.min()
        end_timestamp = timestamp_array.max()

        exist_length = self.count(name, start_timestamp, end_timestamp)

        if exist_length > 0:

            filer_array = self.get_slice(name, start_timestamp, end_timestamp)

            if self.dtype:
                filter_timestamps = filer_array[self.timestamp_column_name]
            else:
                filter_timestamps = filer_array[:, self.timestamp_column_index]

            # check repeated data
            duplicated = np.intersect1d(filter_timestamps, timestamp_array)

            if duplicated.size > 0:
                raise RedisTimeSeriesError(
                    "add duplicated timestamp into redis -> timestamp:")
예제 #7
0
    def __init__(self,
                 redis_client,
                 max_length=100000,
                 dtype=None,
                 timestamp_column_name=None,
                 timestamp_column_index=0,
                 *args,
                 **kwargs):
        """
        :param dtype: numpy.dtype, if set the dtype and timestamp_column_name can't be None
        :param timestamp_column_name: timestamp column name
        :param timestamp_column_index: timestamp column index
        :param args:
        :param kwargs:
        """
        super(RedisNumpyTimeSeries, self).__init__(redis_client=redis_client,
                                                   max_length=max_length,
                                                   *args,
                                                   **kwargs)

        if dtype is not None and timestamp_column_name is None:
            raise RedisTimeSeriesError("dtype and timestamp_column_name "
                                       "must both be specified")

        if dtype:
            self.dtype = np.dtype(dtype)
            self.timestamp_column_name = timestamp_column_name

            self.names = list(self.dtype.names)
            self.timestamp_name_index = self.names.index(timestamp_column_name)
        else:
            self.dtype = None

        self.timestamp_column_index = timestamp_column_index
예제 #8
0
 def _validate_append_data(self, data_frame):
     """
     validate repeated index
     :return:
     """
     date_index = data_frame.index
     unique_date = date_index[date_index.duplicated()].unique()
     if not unique_date.empty:
         raise RedisTimeSeriesError(
             "DataFrame index can't contains duplicated index data")
예제 #9
0
    def _validate_duplicated_index(self, array):
        """
        sorted timestamp and check exist repeated timestamp
        :param array:
        :return:
        """
        # sort timestamp
        if self.dtype:
            timestamp_array = array[self.timestamp_column_name].astype(
                "float64")
            array[self.timestamp_column_name] = timestamp_array
            array = np.sort(array, order=[self.timestamp_column_name])
        else:
            timestamp_array = array[:, self.timestamp_column_index].astype(
                "float64")
            array[:, self.timestamp_column_index] = timestamp_array
            array = np.sort(array, axis=self.timestamp_column_index)

        # check repeated
        if len(np.unique(timestamp_array)) != len(timestamp_array):
            raise RedisTimeSeriesError("repeated timestamps in array data")

        return array