def _timestamp_exist(self, name, data_frame): """ sorted timestamp and check exist repeated timestamp :param name: :param data_frame: :return: """ date_index = data_frame.index start_timestamp = data_frame.idxmin() start_timestamp = start_timestamp[0].to_pydatetime().timestamp() end_timestamp = data_frame.idxmax() end_timestamp = end_timestamp[0].to_pydatetime().timestamp() exist_length = self.count(name, start_timestamp, end_timestamp) if exist_length > 0: filter_data_frame = self.get_slice(name, start_timestamp, end_timestamp) filter_timestamps_index = filter_data_frame.index # check repeated data duplicated = numpy.intersect1d( filter_timestamps_index.to_pydatetime(), date_index.to_pydatetime()) if duplicated.size > 0: raise RedisTimeSeriesError( "add duplicated timestamp into redis -> timestamp:")
def add(self, name, series): """ :param name: redis key :param series: pandas.Series :return: bool """ self._validate_key(name) if isinstance(series, pd.Series) and hasattr( series.name, "timestamp"): # validate datetime series_time = series.name.to_pydatetime() timestamp = series_time.timestamp() with self._lock: if not self.exist_timestamp(name, timestamp): values = series.tolist() data = self._serializer.dumps(values) if self.length(name) == self.max_length: self.client.zpopmin(name) return self.client.zadd(name, {data: timestamp}) else: raise RedisTimeSeriesError( "Please check series Type or " "series name value is not pandas.DateTimeIndex type")
def __init__(self, redis_client, columns, index_name="timestamp", dtypes=None, max_length=100000, *args, **kwargs): """ :param redis_client: redis client instance, only test with redis-py client. :param timezone: datetime timezone :param columns: pandas DataFrame columns names' list :param dtypes: pandas columns data type, example: {"value":"int64","value2":"float64"} :param max_length: int, max length of data to store the time-series data. :param args: :param kwargs: """ super(RedisPandasTimeSeries, self).__init__(redis_client=redis_client, max_length=max_length, *args, **kwargs) if index_name in columns: raise RedisTimeSeriesError("columns name can't contain index name") self.columns = columns self.dtypes = dtypes self.index_name = index_name
def _timestamp_exist(self, name, array): """ :param name: :param array: already sorted array list """ end_timestamp = array[-1][0] # max start_timestamp = array[0][0] # min exist_length = self.count(name, start_timestamp, end_timestamp) if exist_length > 0: timestamp_array = numpy.fromiter((item[0] for item in array), numpy.float64) data_array = self.get_slice(name, start_timestamp, end_timestamp) filter_timestamp_array = numpy.fromiter( (item[0] for item in data_array), numpy.float64) duplicated = numpy.intersect1d(timestamp_array, filter_timestamp_array) if duplicated.size > 0: raise RedisTimeSeriesError( "add duplicated timestamp into redis -> timestamp:")
def _validate_key(self, name): """ validate redis key can't contains specific names :param name: """ if ":HASH" in name or ":ID" in name: raise RedisTimeSeriesError("Key can't contains `:HASH`, `:ID` values.")
def _timestamp_exist(self, name, array): """ :param name: :param array: :return: """ if self.dtype: timestamp_array = array[self.timestamp_column_name] else: timestamp_array = array[:, self.timestamp_column_index] start_timestamp = timestamp_array.min() end_timestamp = timestamp_array.max() exist_length = self.count(name, start_timestamp, end_timestamp) if exist_length > 0: filer_array = self.get_slice(name, start_timestamp, end_timestamp) if self.dtype: filter_timestamps = filer_array[self.timestamp_column_name] else: filter_timestamps = filer_array[:, self.timestamp_column_index] # check repeated data duplicated = np.intersect1d(filter_timestamps, timestamp_array) if duplicated.size > 0: raise RedisTimeSeriesError( "add duplicated timestamp into redis -> timestamp:")
def __init__(self, redis_client, max_length=100000, dtype=None, timestamp_column_name=None, timestamp_column_index=0, *args, **kwargs): """ :param dtype: numpy.dtype, if set the dtype and timestamp_column_name can't be None :param timestamp_column_name: timestamp column name :param timestamp_column_index: timestamp column index :param args: :param kwargs: """ super(RedisNumpyTimeSeries, self).__init__(redis_client=redis_client, max_length=max_length, *args, **kwargs) if dtype is not None and timestamp_column_name is None: raise RedisTimeSeriesError("dtype and timestamp_column_name " "must both be specified") if dtype: self.dtype = np.dtype(dtype) self.timestamp_column_name = timestamp_column_name self.names = list(self.dtype.names) self.timestamp_name_index = self.names.index(timestamp_column_name) else: self.dtype = None self.timestamp_column_index = timestamp_column_index
def _validate_append_data(self, data_frame): """ validate repeated index :return: """ date_index = data_frame.index unique_date = date_index[date_index.duplicated()].unique() if not unique_date.empty: raise RedisTimeSeriesError( "DataFrame index can't contains duplicated index data")
def _validate_duplicated_index(self, array): """ sorted timestamp and check exist repeated timestamp :param array: :return: """ # sort timestamp if self.dtype: timestamp_array = array[self.timestamp_column_name].astype( "float64") array[self.timestamp_column_name] = timestamp_array array = np.sort(array, order=[self.timestamp_column_name]) else: timestamp_array = array[:, self.timestamp_column_index].astype( "float64") array[:, self.timestamp_column_index] = timestamp_array array = np.sort(array, axis=self.timestamp_column_index) # check repeated if len(np.unique(timestamp_array)) != len(timestamp_array): raise RedisTimeSeriesError("repeated timestamps in array data") return array