def step_data(self, delta_values=False, convert_keys=False): """ A clean multi-dimensional numpy array of the step keys and either the cummulative values or the step change values all in floats and ready to use in further analysis. .. note:: This function returns a dataset that can directly be consumed by numpy, Sklearn and similar packages for forecasting or analysis. Parameters =========== delta_values : bool, Optional Return the step delta changes instead of the cummulative total at each step key. convert_keys : bool Optional If the keys are datetime, they will be converted, else they will remain floats. Returns ======== array """ if delta_values: nice_data = np.copy( self. _all_data[:, [DataModel.START.value, DataModel.DIRECTION.value]]) else: nice_data = np.copy( self. _all_data[:, [DataModel.START.value, DataModel.WEIGHT.value]]) if nice_data[0, DataModel.START.value] == get_epoch_start(False): nice_data = nice_data[1:] if nice_data[-1, DataModel.START.value] == get_epoch_end(False): nice_data = nice_data[:-1] if convert_keys and self._using_dt: nice_data = np.array( list( zip(prepare_datetime(nice_data[:, DataModel.START.value]), nice_data[:, DataModel.DIRECTION.value]))) if nice_data[0, DataModel.START.value] == get_epoch_start(): nice_data[0, DataModel.START.value] = nice_data[ 1, DataModel.START.value] else: return self._all_data[:, DataModel.START.value] return nice_data
def _process_data(self, start=None, end=None, weight=None, use_datetime=False): start = np.full(len(end), None) if start is None else start weight = np.ones(len(start), dtype=np.int32) if weight is None else weight end = np.full(len(start), None) if end is None else end if use_datetime: convert_func = date_to_float else: convert_func = float epoch_start = get_epoch_start(False) epoch_end = get_epoch_end(False) for s, e, w in zip(start, end, weight): if pd.isnull(s) and not pd.isnull(e): yield (epoch_start, 1, w) yield (convert_func(e), 1, -w) elif pd.isnull(s) and pd.isnull(e): yield (epoch_start, 1, w) yield (epoch_end, 1, -w) elif pd.isnull(e): yield (convert_func(s), 1, w) else: yield (convert_func(s), 1, w) yield (convert_func(e), 1, -w)
def _filter_by_mask(step_data,mask,normalise_value = 0): if np.alltrue(mask): return step_data new_steps = [] st = None adj = 0 for i ,s in enumerate(step_data[:,DataModel.START.value]): if mask[i]: if st is None: st = i if normalise_value == 0: new_steps.append([s,1,step_data[i,DataModel.WEIGHT.value]]) else: new_steps.append([s,1,normalise_value]) elif st is not None and (i > st) and normalise_value == 0: new_steps.append([s,1,step_data[i,DataModel.DIRECTION.value]]) adj += step_data[i,DataModel.DIRECTION.value] else: if st is not None and st != get_epoch_start(False): if normalise_value == 0: new_steps.append([s,1,-1*(step_data[st,DataModel.WEIGHT.value] + adj)]) adj = 0 else: new_steps.append([s,1,-1*normalise_value]) st = None return new_steps
def read_array(cls, start=None, end=None, weight=None, use_datetime=False, convert_delta=False): """ Read arrays of values for start, end and weight values that represent either the cummulative value of the data steps or the direct step values seperately, indexed by the start and possibly end arrays. Parameters ============== start : array_like An array of step start location values. end : array_like, Optional An array of step end location values. weight : array_like, Optional An array of step weight values, if these are not provided, a value of 1 will be assigned for each row entry. use_datetime : bool, Opyional Assume start and end fields are of datetime format (Numpy.datetime64,datetime or Pandas.Timestamp). convert_delta : bool, Optional Assume weight values are individual step weights (default), or convert values by performing a delta between adjacent values. The data is assumed to be sorted by the provided start values. Returns ============== Steps See Also ============== read_dataframe read_dict """ if hasattr(start, '__iter__') or hasattr( end, '__iter__'): #needs to be an array like object if convert_delta: weight0 = 0 if weight[0] != 0: weight0 = weight[0] if weight0 != 0 and not pd.isnull(start[0]): weight = np.diff(weight) new_steps = cls(use_datetime).add_direct(start, end, weight) new_steps.add_steps([[get_epoch_start(False), 1, weight0]]) else: weight = np.diff(weight, prepend=0) new_steps = cls(use_datetime).add_direct(start, end, weight) else: new_steps = cls(use_datetime).add_direct(start, end, weight) return new_steps else: raise TypeError( "input data must be array like, python array or ndarray.")
def step_keys(self, convert_keys=False): """ The step key values within this object, can be returned either in raw float format or converted if using datetime. Parameters =========== convert_keys : bool Optional If the keys are datetime, they will be converted, else they will remain floats. Returns ======== array Step keys """ if convert_keys and self._using_dt: keys = prepare_datetime(self._all_data[:, DataModel.START.value], self._using_dt) if keys[0] == get_epoch_start(): keys[0] = keys[1] return keys else: return self._all_data[:, DataModel.START.value]
def __iter__(self): if self._step_data[0, DataModel.START.value] == get_epoch_start(False): self._index = 1 return iter([ type(self)(self._using_dt).add_steps([s]) for s in self._step_data[1:] ]) else: self._index = 0 return iter([ type(self)(self._using_dt).add_steps([s]) for s in self._step_data ])
def fast_step(self, xdata, process_input=True, side='right'): """ This will evaluate the cummulative steps function at the provided input values. This function ignores the assigned basis and performs some numpy trickery to improve performance. .. note:: This function will ignore the assigned basis and evaluate the cummulative function directly, to ensure the assigned basis is used, please use the `step` function. Parameters ========== xdata : array_like(int, float, datetime) The values the steps function is to be evaluated at. process_input : bool, Optional Indicate if the input data needs processing, to convert datetimes to floats for calculation. Primarily used internally to avoid converting input data twice. side : {'right', 'left'}, Optional Location to evaluate the steps function relative to the step location. Default is *'right'*, which means the step assumes the weight value on and after the step key value. Returns ======== array The values of the cummulative steps function evaluated at the provided input (x axis) values. See Also ========= step smooth_step """ if process_input: x = prepare_input(xdata) else: x = xdata search_data = np.concatenate([ self.step(np.array([get_epoch_start(False)]), False), self._all_data[:, DataModel.WEIGHT.value] ]) if self._all_data.shape[0] < 5: return self.step(x) #improves lookup performance, just need an extra check to avoid over/under run limit = search_data.shape[0] idxs = np.searchsorted(self._all_data[:, DataModel.START.value], x, side=side) return search_data[np.clip(idxs, 0, limit)]
def step(self, xdata, process_input=True): """ This is a mathematical function definition of the Steps object, this is a dynamically created formula representation that can be passed an array of values to evaluate the steps function at. Parameters =========== xdata : array_like(int, float, datetime) The values the steps function is the be evaluated at using the assigned mathematical basis function. process_input : bool, Optional Indicate if the input data needs processing, to convert datetimes to floats for calculation. Primarily used internally to avoid converting input data twice. Returns ======== array The values of the cummulative steps function evaluated at the provided input (x axis) values. See Also ========= fast_step smooth_step """ #if we are using default basis, get answer even quicker # if self._basis.name == 'Heaviside' and self._all_data.shape[0] != 1: # return self.fast_step(xdata=xdata,process_input=process_input) if process_input: x = prepare_input(xdata) else: x = xdata if self._step_data.shape[0] > 0: result = self._base(x, self._step_data, self._basis.param) if (self._basis.name != 'Heaviside') and (x[0] == get_epoch_start(False)): result[0] = result[1] else: return np.zeros(len(x)) return result
def _clip(self, lbound=None, ubound=None): step_data = self._all_data if lbound is None: lower_idx = 0 idxs = np.searchsorted(self._all_data[:, DataModel.START.value], ubound, side='right') upper_idx = idxs if idxs >= 0 else -1 step_slice = step_data[:upper_idx] new_steps = np.empty((step_slice.shape[0], 3)) new_steps[:, DataModel.START.value] = step_slice[:, DataModel. START.value] new_steps[:, DataModel.DIRECTION.value] = 1 new_steps[:, DataModel.WEIGHT.value] = step_slice[:, DataModel. DIRECTION.value] elif ubound is None: idxs = np.searchsorted(self._all_data[:, DataModel.START.value], lbound, side='right') lower_idx = idxs if idxs >= 0 else 0 upper_idx = -1 step_slice = step_data[lower_idx:] new_steps = np.empty((step_slice.shape[0], 3)) new_steps[:, DataModel.START.value] = step_slice[:, DataModel. START.value] new_steps[:, DataModel.DIRECTION.value] = 1 new_steps[:, DataModel.WEIGHT.value] = step_slice[:, DataModel. DIRECTION.value] new_start_weight = self(lbound)[0] if new_start_weight != 0: new_steps = np.insert( new_steps, 0, [[get_epoch_start(False), 1, new_start_weight]], axis=0) else: new_steps = np.insert(new_steps, 0, [[lbound, 1, new_start_weight]], axis=0) else: if lbound <= self._start: lower_idx = 0 idxs = np.searchsorted(self._all_data[:, DataModel.START.value], ubound, side='right') upper_idx = idxs if idxs >= 0 else -1 step_slice = step_data[:upper_idx] new_steps = np.empty((step_slice.shape[0], 3)) new_steps[:, DataModel.START.value] = step_slice[:, DataModel. START.value] new_steps[:, DataModel.DIRECTION.value] = 1 new_steps[:, DataModel.WEIGHT. value] = step_slice[:, DataModel.DIRECTION.value] elif ubound >= self._end: idxs = np.searchsorted(self._all_data[:, DataModel.START.value], lbound, side='right') lower_idx = idxs if idxs >= 0 else 0 upper_idx = -1 step_slice = step_data[lower_idx:] new_steps = np.empty((step_slice.shape[0], 3)) new_steps[:, DataModel.START.value] = step_slice[:, DataModel. START.value] new_steps[:, DataModel.DIRECTION.value] = 1 new_steps[:, DataModel.WEIGHT. value] = step_slice[:, DataModel.DIRECTION.value] new_start_weight = self(lbound)[0] if new_start_weight != 0: new_steps = np.insert( new_steps, 0, [[get_epoch_start(False), 1, new_start_weight]], axis=0) else: new_steps = np.insert(new_steps, 0, [[lbound, 1, new_start_weight]], axis=0) else: idxs = np.searchsorted(self._all_data[:, DataModel.START.value], lbound, side='right') lower_idx = idxs if idxs >= 0 else 0 idxs = np.searchsorted(self._all_data[:, DataModel.START.value], ubound, side='right') upper_idx = idxs if idxs >= 0 else -1 step_slice = step_data[lower_idx:upper_idx] new_steps = np.empty((step_slice.shape[0], 3)) new_steps[:, DataModel.START.value] = step_slice[:, DataModel. START.value] new_steps[:, DataModel.DIRECTION.value] = 1 new_steps[:, DataModel.WEIGHT. value] = step_slice[:, DataModel.DIRECTION.value] end_val = -1 * (np.sum(step_slice[lower_idx:upper_idx, DataModel.DIRECTION.value])) new_start_weight = self(lbound)[0] if new_start_weight != 0: new_steps = np.insert( new_steps, 0, [[get_epoch_start(False), 1, new_start_weight]], axis=0) else: new_steps = np.insert(new_steps, 0, [[lbound, 1, new_start_weight]], axis=0) new_steps = np.append(new_steps, [[ubound, 1, end_val]], axis=0) return new_steps
def _recalculate(self): try: self._step_data = self._step_data[ ~np.isnan(self._step_data[:, DataModel.START.value])] #self._step_data = self._step_data[self._step_data[:,DataModel.START.value]!=0] self._step_data = self._step_data[ self._step_data[:, DataModel.START.value] != np.NINF] self._step_data = self._step_data[ self._step_data[:, DataModel.START.value] != np.PINF] self._step_data = self._step_data[np.argsort( self._step_data[:, DataModel.START.value])] #great numpy group by library! all_keys, all_values = group_by( self._step_data[:, DataModel.START.value]).sum( self._step_data[:, DataModel.DIRECTION.value] * self._step_data[:, DataModel.WEIGHT.value]) #this is the raw step definitiondata for the application of basis functions self._step_data = np.empty((len(all_keys), 3)) self._step_data[:, DataModel.START.value] = all_keys self._step_data[:, DataModel.DIRECTION.value] = 1.0 self._step_data[:, DataModel.WEIGHT.value] = all_values start_key = np.amin(all_keys) if start_key == get_epoch_start(False): if len(all_keys) > 2: start_key = all_keys[1] else: start_key = all_keys[0] else: start_key = all_keys[0] end_key = np.amax(all_keys) if end_key == get_epoch_end(False) and len(all_keys) > 2: end_key = all_keys[-2] elif end_key == get_epoch_end(False) and len(all_keys) == 1: end_key = all_keys[0] else: end_key = all_keys[-1] #The real value start and end points for the entire series of steps self._start = start_key self._end = end_key #this is the computed summary describing the steps data for fast access all_data = np.empty((all_keys.shape[0], 3)) all_data[:, DataModel.START.value] = all_keys all_data[:, DataModel.DIRECTION.value] = all_values all_data[:, DataModel.WEIGHT.value] = np.cumsum( np.asarray(all_values), axis=0) self._all_data = all_data except ValueError: print( 'Empty steps objects can not perform operations, please load some data and try again' ) except TypeError: print( 'Empty steps objects can not perform operations, please load some data and try again' )
def plot(self, method=None, smooth_factor=None, smooth_basis=None, interval=0.01, ax=None, where='post', **kargs): """ Plot the steps function using different parameters and methods. Parameters =========== method : {'function','smooth','pretty','smooth_function', Optional} Specify how the steps should be calculated to generate the plot and the type of plot style. smooth_factor : int, float, Optional If using the method='smooth' option, set the strength of the smoothing to apply. smooth_basis : Basis, Optional The `:class: Basis` to use when calculating the smooth steps function. interval : int, float, Pandas.Timedetla, Optional If using method = 'function' or 'smooth', specify the increment size between step key locations used to calculate the steps function. ax : Matplotlib.Axes, Optional The axes to plot this chart onto is already defined. where : {'pre', 'post', Optional} How to draw the step plot, this parameter is the same as the Matplotlib *where* parameter used in the Axes.step plotting function. **kargs : Matplotlib key-value arguments Returns ========= Matplotlib.Axes A reference to the plot axes object to allow further plotting on the same axes. Examples ========== .. plot:: :context: close-figs s1 = Step(5,10,3) s2 = Step(6,weight=2) st = s1 + s2 ax = s1.plot(color='r',figsize=(8,4)) s2.plot(ax=ax,method='function') s2.plot(ax=ax,method='smooth') st.plot(ax=ax) st.smooth_plot(ax=ax) ax.set_title('Steps Plot') """ if ax is None: plot_size = kargs.pop('figsize', None) if plot_size is None: plot_size = get_default_plot_size() _, ax = plt.subplots(figsize=plot_size) np_keys = self.step_keys() np_values = self.step_values() reverse_step = False if len(np_keys) < 3: if len(np_keys) == 0: ax.axhline( self(get_epoch_start(self.using_datetime()))[0], **kargs) return ax else: reverse_step = np_keys[0] == get_epoch_start(False) np_keys = get_plot_range(self.first(), self.last(), interval, use_datetime=self.using_datetime()) np_values = self.step(np_keys) if method == 'pretty': end_index = len(np_keys) start_index = 1 if self.using_datetime(): offset = pd.Timedelta(minutes=1) offset = prepare_datetime(np_keys) else: offset = 0.0000000001 if np_keys[0] == get_epoch_start(self.using_datetime()): np_keys[0] = np_keys[1] - offset np_keys = np.insert(np_keys, 0, np_keys[0] - offset) np_values = np.insert(np_values, 0, 0) np_keys[0] = np_keys[0] - offset step0_k = np_keys[0] step0_v = np_values[0] for i in range(len(np_keys)): k = np_keys[i] v = np_values[i] ax.hlines(y=step0_v, xmin=step0_k, xmax=k, **kargs) ax.vlines(x=k, ymin=step0_v, ymax=v, linestyles=':', **kargs) if i > start_index - 1 and i < end_index: if i == start_index: ax.plot(k, v, marker='o', fillstyle='full', **kargs) else: ax.plot(k, step0_v, marker='o', fillstyle='none', **kargs) ax.plot(k, v, marker='o', fillstyle='full', **kargs) elif i == end_index: ax.plot(k, step0_v, marker='o', fillstyle='none', **kargs) step0_k = k step0_v = v elif method == 'function': tsx = get_plot_range(self.first(), self.last(), interval, use_datetime=self.using_datetime()) ax.step(tsx, self.step(tsx), where=where, **kargs) elif method == 'smooth_function': tsx = get_plot_range(self.first(), self.last(), interval, use_datetime=self.using_datetime()) ax.plot( tsx, self.smooth_step(tsx, smooth_factor=smooth_factor, smooth_basis=smooth_basis), **kargs) elif method == 'smooth': if np_keys.shape[0] < 20: tsx = get_plot_range(self.first(), self.last(), interval, use_datetime=self.using_datetime()) ax.plot( tsx, self.smooth_step(tsx, smooth_factor=smooth_factor, smooth_basis=smooth_basis), **kargs) else: # small offset to ensure we plot the initial step transition if self.using_datetime(): offset = pd.Timedelta(minutes=1) np_keys = prepare_datetime(np_keys) else: offset = 0.000000000001 if np_keys[0] == get_epoch_start(self.using_datetime()): np_keys[0] = np_keys[1] - offset elif not reverse_step: np_keys = np.insert(np_keys, 0, np_keys[0] - offset) np_values = np.insert(np_values, 0, 0) np_keys[0] = np_keys[0] - offset ax.plot( np_keys, self.smooth_step(np_keys, smooth_factor=smooth_factor, smooth_basis=smooth_basis), **kargs) else: # small offset to ensure we plot the initial step transition if self.using_datetime(): offset = pd.Timedelta(minutes=1) np_keys = prepare_datetime(np_keys) else: offset = 0.0000000001 if np_keys[0] == get_epoch_start(self.using_datetime()): np_keys[0] = np_keys[1] - offset elif not reverse_step: np_keys = np.insert(np_keys, 0, np_keys[0] - offset) np_values = np.insert(np_values, 0, 0) np_keys[0] = np_keys[0] - offset ax.step(np_keys, np_values, where=where, **kargs) return ax
def plot_rolling_step(self, rolling_function=None, window=5, pre_mid_post='mid', ax=None, **kargs): """ Plot the result of applying a reduction function to a rolling window across the step values. Parameters ============== rolling_function : Numpy.ufunc, Optional A numpy reduction function to apply to the rolling window across the steps data, for example np.mean, np.max. widnow : int, Optional The size of the rolling window to apply across the steps data. pre_mid_post : {'pre','mid','post'}, Optional Where to centre the reduction location within the rolling window. Using centre will associate the reduced value with the centre key of the window. ax : Matplotlib.Axes The plot axis to create the plot on if being created externally. **kargs : Matplotlib key-value paramters to pass to the plot. Returns ======== Matplotlib.Axes See Also ============== ecdf_plot pacf_plot histogram_plot summary """ if ax is None: plot_size = kargs.pop('figsize', None) if plot_size is None: plot_size = get_default_plot_size() _, ax = plt.subplots(figsize=plot_size) #if kargs.get('color') is None: # kargs['color']=get_default_plot_color() np_keys = self.step_keys() # small offset to ensure we plot the initial step transition if self.using_datetime(): offset = pd.Timedelta(minutes=1) np_keys = prepare_datetime(np_keys) else: offset = 0.0000001 if np_keys[0] == get_epoch_start(self.using_datetime()): np_keys[0] = np_keys[1] - offset else: np_keys[0] = np_keys[0] - offset x, y = self.rolling_function_step(np_keys, rolling_function=rolling_function, window=window, pre_mid_post=pre_mid_post) ax.plot(x, y, **kargs) return ax