示例#1
0
    def step_data(self, delta_values=False, convert_keys=False):
        """
        A clean multi-dimensional numpy array of the step keys and either the cummulative values or the step change values all in floats and ready to use in further analysis.
        
        .. note::
            This function returns a dataset that can directly be consumed by numpy, Sklearn and similar packages for forecasting or analysis.


        Parameters
        ===========
        delta_values : bool, Optional
            Return the step delta changes instead of the cummulative total at each step key.

        convert_keys : bool Optional
            If the keys are datetime, they will be converted, else they will remain floats.


        Returns
        ========
        array


        """

        if delta_values:
            nice_data = np.copy(
                self.
                _all_data[:,
                          [DataModel.START.value, DataModel.DIRECTION.value]])
        else:
            nice_data = np.copy(
                self.
                _all_data[:, [DataModel.START.value, DataModel.WEIGHT.value]])

        if nice_data[0, DataModel.START.value] == get_epoch_start(False):
            nice_data = nice_data[1:]

        if nice_data[-1, DataModel.START.value] == get_epoch_end(False):
            nice_data = nice_data[:-1]

        if convert_keys and self._using_dt:
            nice_data = np.array(
                list(
                    zip(prepare_datetime(nice_data[:, DataModel.START.value]),
                        nice_data[:, DataModel.DIRECTION.value])))

            if nice_data[0, DataModel.START.value] == get_epoch_start():
                nice_data[0, DataModel.START.value] = nice_data[
                    1, DataModel.START.value]
        else:
            return self._all_data[:, DataModel.START.value]

        return nice_data
示例#2
0
    def _process_data(self,
                      start=None,
                      end=None,
                      weight=None,
                      use_datetime=False):
        start = np.full(len(end), None) if start is None else start
        weight = np.ones(len(start),
                         dtype=np.int32) if weight is None else weight
        end = np.full(len(start), None) if end is None else end

        if use_datetime:
            convert_func = date_to_float
        else:
            convert_func = float

        epoch_start = get_epoch_start(False)
        epoch_end = get_epoch_end(False)

        for s, e, w in zip(start, end, weight):
            if pd.isnull(s) and not pd.isnull(e):
                yield (epoch_start, 1, w)
                yield (convert_func(e), 1, -w)
            elif pd.isnull(s) and pd.isnull(e):
                yield (epoch_start, 1, w)
                yield (epoch_end, 1, -w)
            elif pd.isnull(e):
                yield (convert_func(s), 1, w)
            else:
                yield (convert_func(s), 1, w)
                yield (convert_func(e), 1, -w)
示例#3
0
def _filter_by_mask(step_data,mask,normalise_value = 0):

        if np.alltrue(mask):
            return step_data

        new_steps = []

        st = None
        adj = 0
        for i ,s in enumerate(step_data[:,DataModel.START.value]):
            if mask[i]:
                if st is None:
                    st = i
                    if normalise_value == 0:
                        new_steps.append([s,1,step_data[i,DataModel.WEIGHT.value]])
                    else:
                        new_steps.append([s,1,normalise_value])
                elif st is not None and (i > st) and normalise_value == 0:
                    new_steps.append([s,1,step_data[i,DataModel.DIRECTION.value]])
                    adj += step_data[i,DataModel.DIRECTION.value]
            else:
                if st is not None and st != get_epoch_start(False):
                    if normalise_value == 0:
                        new_steps.append([s,1,-1*(step_data[st,DataModel.WEIGHT.value] + adj)])
                        adj = 0
                    else:
                        new_steps.append([s,1,-1*normalise_value])
                    st = None
        
        return new_steps
示例#4
0
def read_array(cls,
               start=None,
               end=None,
               weight=None,
               use_datetime=False,
               convert_delta=False):
    """
    Read arrays of values for start, end and weight values that represent either the cummulative value of the data steps or the direct step
    values seperately, indexed by the start and possibly end arrays.

    Parameters
    ==============
    start : array_like
        An array of step start location values.

    end : array_like, Optional
        An array of step end location values.

    weight : array_like, Optional
        An array of step weight values, if these are not provided, a value of 1 will be assigned for each row entry.

    use_datetime : bool, Opyional
        Assume start and end fields are of datetime format (Numpy.datetime64,datetime or Pandas.Timestamp).

    convert_delta : bool, Optional
        Assume weight values are individual step weights (default), or convert values by performing a delta between adjacent values. The data
        is assumed to be sorted by the provided start values.

    Returns
    ==============
    Steps

    See Also
    ==============
    read_dataframe
    read_dict

    """

    if hasattr(start, '__iter__') or hasattr(
            end, '__iter__'):  #needs to be an array like object
        if convert_delta:
            weight0 = 0
            if weight[0] != 0:
                weight0 = weight[0]

            if weight0 != 0 and not pd.isnull(start[0]):
                weight = np.diff(weight)
                new_steps = cls(use_datetime).add_direct(start, end, weight)
                new_steps.add_steps([[get_epoch_start(False), 1, weight0]])
            else:
                weight = np.diff(weight, prepend=0)
                new_steps = cls(use_datetime).add_direct(start, end, weight)
        else:
            new_steps = cls(use_datetime).add_direct(start, end, weight)

        return new_steps
    else:
        raise TypeError(
            "input data must be array like, python array or ndarray.")
示例#5
0
    def step_keys(self, convert_keys=False):
        """
        The step key values within this object, can be returned either in raw float format or converted if using datetime.

        Parameters
        ===========
        convert_keys : bool Optional
            If the keys are datetime, they will be converted, else they will remain floats.


        Returns
        ========
        array
            Step keys

        """

        if convert_keys and self._using_dt:
            keys = prepare_datetime(self._all_data[:, DataModel.START.value],
                                    self._using_dt)
            if keys[0] == get_epoch_start():
                keys[0] = keys[1]

            return keys
        else:
            return self._all_data[:, DataModel.START.value]
示例#6
0
    def __iter__(self):

        if self._step_data[0, DataModel.START.value] == get_epoch_start(False):
            self._index = 1
            return iter([
                type(self)(self._using_dt).add_steps([s])
                for s in self._step_data[1:]
            ])
        else:
            self._index = 0
            return iter([
                type(self)(self._using_dt).add_steps([s])
                for s in self._step_data
            ])
示例#7
0
    def fast_step(self, xdata, process_input=True, side='right'):
        """
        This will evaluate the cummulative steps function at the provided input values. This function ignores the assigned basis and performs some numpy trickery to improve performance.
        
        .. note::
            This function will ignore the assigned basis and evaluate the cummulative function directly, to ensure the assigned basis is used, please use the `step` function.

        
        Parameters
        ==========
        xdata : array_like(int, float, datetime)
            The values the steps function is to be evaluated at.

        process_input : bool, Optional
            Indicate if the input data needs processing, to convert datetimes to floats for calculation. Primarily used internally to avoid converting input data twice.

        side : {'right', 'left'}, Optional
            Location to evaluate the steps function relative to the step location. Default is *'right'*, which means the step assumes the weight value on and after the step key value.

        Returns
        ========
        array
            The values of the cummulative steps function evaluated at the provided input (x axis) values.

        See Also
        =========
        step
        smooth_step

        """

        if process_input:
            x = prepare_input(xdata)
        else:
            x = xdata

        search_data = np.concatenate([
            self.step(np.array([get_epoch_start(False)]), False),
            self._all_data[:, DataModel.WEIGHT.value]
        ])
        if self._all_data.shape[0] < 5:
            return self.step(x)

        #improves lookup performance, just need an extra check to avoid over/under run
        limit = search_data.shape[0]
        idxs = np.searchsorted(self._all_data[:, DataModel.START.value],
                               x,
                               side=side)
        return search_data[np.clip(idxs, 0, limit)]
示例#8
0
    def step(self, xdata, process_input=True):
        """
        This is a mathematical function definition of the Steps object, this is a dynamically created formula representation that can be passed an array of values to evaluate the steps function at.
        
        Parameters
        ===========
        xdata : array_like(int, float, datetime)
            The values the steps function is the be evaluated at using the assigned mathematical basis function.

        process_input : bool, Optional
            Indicate if the input data needs processing, to convert datetimes to floats for calculation. Primarily used internally to avoid converting input data twice.

        Returns
        ========
        array
            The values of the cummulative steps function evaluated at the provided input (x axis) values.

        See Also
        =========
        fast_step
        smooth_step

        """

        #if we are using default basis, get answer even quicker
        # if self._basis.name == 'Heaviside' and self._all_data.shape[0] != 1:
        #     return self.fast_step(xdata=xdata,process_input=process_input)

        if process_input:
            x = prepare_input(xdata)
        else:
            x = xdata

        if self._step_data.shape[0] > 0:
            result = self._base(x, self._step_data, self._basis.param)
            if (self._basis.name !=
                    'Heaviside') and (x[0] == get_epoch_start(False)):
                result[0] = result[1]
        else:
            return np.zeros(len(x))

        return result
示例#9
0
    def _clip(self, lbound=None, ubound=None):

        step_data = self._all_data

        if lbound is None:
            lower_idx = 0
            idxs = np.searchsorted(self._all_data[:, DataModel.START.value],
                                   ubound,
                                   side='right')
            upper_idx = idxs if idxs >= 0 else -1

            step_slice = step_data[:upper_idx]
            new_steps = np.empty((step_slice.shape[0], 3))
            new_steps[:, DataModel.START.value] = step_slice[:, DataModel.
                                                             START.value]
            new_steps[:, DataModel.DIRECTION.value] = 1
            new_steps[:, DataModel.WEIGHT.value] = step_slice[:, DataModel.
                                                              DIRECTION.value]

        elif ubound is None:
            idxs = np.searchsorted(self._all_data[:, DataModel.START.value],
                                   lbound,
                                   side='right')
            lower_idx = idxs if idxs >= 0 else 0
            upper_idx = -1

            step_slice = step_data[lower_idx:]
            new_steps = np.empty((step_slice.shape[0], 3))
            new_steps[:, DataModel.START.value] = step_slice[:, DataModel.
                                                             START.value]
            new_steps[:, DataModel.DIRECTION.value] = 1
            new_steps[:, DataModel.WEIGHT.value] = step_slice[:, DataModel.
                                                              DIRECTION.value]

            new_start_weight = self(lbound)[0]
            if new_start_weight != 0:
                new_steps = np.insert(
                    new_steps,
                    0, [[get_epoch_start(False), 1, new_start_weight]],
                    axis=0)
            else:
                new_steps = np.insert(new_steps,
                                      0, [[lbound, 1, new_start_weight]],
                                      axis=0)

        else:
            if lbound <= self._start:
                lower_idx = 0
                idxs = np.searchsorted(self._all_data[:,
                                                      DataModel.START.value],
                                       ubound,
                                       side='right')
                upper_idx = idxs if idxs >= 0 else -1

                step_slice = step_data[:upper_idx]
                new_steps = np.empty((step_slice.shape[0], 3))
                new_steps[:, DataModel.START.value] = step_slice[:, DataModel.
                                                                 START.value]
                new_steps[:, DataModel.DIRECTION.value] = 1
                new_steps[:, DataModel.WEIGHT.
                          value] = step_slice[:, DataModel.DIRECTION.value]

            elif ubound >= self._end:
                idxs = np.searchsorted(self._all_data[:,
                                                      DataModel.START.value],
                                       lbound,
                                       side='right')
                lower_idx = idxs if idxs >= 0 else 0
                upper_idx = -1

                step_slice = step_data[lower_idx:]
                new_steps = np.empty((step_slice.shape[0], 3))
                new_steps[:, DataModel.START.value] = step_slice[:, DataModel.
                                                                 START.value]
                new_steps[:, DataModel.DIRECTION.value] = 1
                new_steps[:, DataModel.WEIGHT.
                          value] = step_slice[:, DataModel.DIRECTION.value]

                new_start_weight = self(lbound)[0]
                if new_start_weight != 0:
                    new_steps = np.insert(
                        new_steps,
                        0, [[get_epoch_start(False), 1, new_start_weight]],
                        axis=0)
                else:
                    new_steps = np.insert(new_steps,
                                          0, [[lbound, 1, new_start_weight]],
                                          axis=0)
            else:
                idxs = np.searchsorted(self._all_data[:,
                                                      DataModel.START.value],
                                       lbound,
                                       side='right')
                lower_idx = idxs if idxs >= 0 else 0

                idxs = np.searchsorted(self._all_data[:,
                                                      DataModel.START.value],
                                       ubound,
                                       side='right')
                upper_idx = idxs if idxs >= 0 else -1

                step_slice = step_data[lower_idx:upper_idx]
                new_steps = np.empty((step_slice.shape[0], 3))
                new_steps[:, DataModel.START.value] = step_slice[:, DataModel.
                                                                 START.value]
                new_steps[:, DataModel.DIRECTION.value] = 1
                new_steps[:, DataModel.WEIGHT.
                          value] = step_slice[:, DataModel.DIRECTION.value]

                end_val = -1 * (np.sum(step_slice[lower_idx:upper_idx,
                                                  DataModel.DIRECTION.value]))

                new_start_weight = self(lbound)[0]
                if new_start_weight != 0:
                    new_steps = np.insert(
                        new_steps,
                        0, [[get_epoch_start(False), 1, new_start_weight]],
                        axis=0)
                else:
                    new_steps = np.insert(new_steps,
                                          0, [[lbound, 1, new_start_weight]],
                                          axis=0)

                new_steps = np.append(new_steps, [[ubound, 1, end_val]],
                                      axis=0)

        return new_steps
示例#10
0
    def _recalculate(self):
        try:
            self._step_data = self._step_data[
                ~np.isnan(self._step_data[:, DataModel.START.value])]
            #self._step_data = self._step_data[self._step_data[:,DataModel.START.value]!=0]
            self._step_data = self._step_data[
                self._step_data[:, DataModel.START.value] != np.NINF]
            self._step_data = self._step_data[
                self._step_data[:, DataModel.START.value] != np.PINF]
            self._step_data = self._step_data[np.argsort(
                self._step_data[:, DataModel.START.value])]

            #great numpy group by library!
            all_keys, all_values = group_by(
                self._step_data[:, DataModel.START.value]).sum(
                    self._step_data[:, DataModel.DIRECTION.value] *
                    self._step_data[:, DataModel.WEIGHT.value])

            #this is the raw step definitiondata for the application of basis functions
            self._step_data = np.empty((len(all_keys), 3))
            self._step_data[:, DataModel.START.value] = all_keys
            self._step_data[:, DataModel.DIRECTION.value] = 1.0
            self._step_data[:, DataModel.WEIGHT.value] = all_values

            start_key = np.amin(all_keys)
            if start_key == get_epoch_start(False):
                if len(all_keys) > 2:
                    start_key = all_keys[1]
                else:
                    start_key = all_keys[0]
            else:
                start_key = all_keys[0]

            end_key = np.amax(all_keys)
            if end_key == get_epoch_end(False) and len(all_keys) > 2:
                end_key = all_keys[-2]
            elif end_key == get_epoch_end(False) and len(all_keys) == 1:
                end_key = all_keys[0]
            else:
                end_key = all_keys[-1]

            #The real value start and end points for the entire series of steps
            self._start = start_key
            self._end = end_key

            #this is the computed summary describing the steps data for fast access
            all_data = np.empty((all_keys.shape[0], 3))
            all_data[:, DataModel.START.value] = all_keys
            all_data[:, DataModel.DIRECTION.value] = all_values
            all_data[:, DataModel.WEIGHT.value] = np.cumsum(
                np.asarray(all_values), axis=0)

            self._all_data = all_data

        except ValueError:
            print(
                'Empty steps objects can not perform operations, please load some data and try again'
            )
        except TypeError:
            print(
                'Empty steps objects can not perform operations, please load some data and try again'
            )
示例#11
0
    def plot(self,
             method=None,
             smooth_factor=None,
             smooth_basis=None,
             interval=0.01,
             ax=None,
             where='post',
             **kargs):
        """
        Plot the steps function using different parameters and methods.

        Parameters
        ===========
        method : {'function','smooth','pretty','smooth_function', Optional}
            Specify how the steps should be calculated to generate the plot and the type of plot style.

        smooth_factor : int, float, Optional
            If using the method='smooth' option, set the strength of the smoothing to apply.

        smooth_basis : Basis, Optional
            The `:class: Basis` to use when calculating the smooth steps function.

        interval : int, float, Pandas.Timedetla, Optional
            If using method = 'function' or 'smooth', specify the increment size between step key locations used to calculate the steps function.

        ax : Matplotlib.Axes, Optional
            The axes to plot this chart onto is already defined.

        where : {'pre', 'post', Optional}
            How to draw the step plot, this parameter is the same as the Matplotlib *where* parameter used in the Axes.step plotting function.

        **kargs :
            Matplotlib key-value arguments


        Returns
        =========
        Matplotlib.Axes
            A reference to the plot axes object to allow further plotting on the same axes.

        Examples
        ==========

        .. plot::
            :context: close-figs

            s1 = Step(5,10,3)
            s2 = Step(6,weight=2)
            st = s1 + s2
            ax = s1.plot(color='r',figsize=(8,4))
            s2.plot(ax=ax,method='function')
            s2.plot(ax=ax,method='smooth')
            st.plot(ax=ax)
            st.smooth_plot(ax=ax)

            ax.set_title('Steps Plot')
            
        """

        if ax is None:
            plot_size = kargs.pop('figsize', None)
            if plot_size is None:
                plot_size = get_default_plot_size()

            _, ax = plt.subplots(figsize=plot_size)

        np_keys = self.step_keys()
        np_values = self.step_values()

        reverse_step = False

        if len(np_keys) < 3:
            if len(np_keys) == 0:
                ax.axhline(
                    self(get_epoch_start(self.using_datetime()))[0], **kargs)
                return ax
            else:
                reverse_step = np_keys[0] == get_epoch_start(False)
                np_keys = get_plot_range(self.first(),
                                         self.last(),
                                         interval,
                                         use_datetime=self.using_datetime())
                np_values = self.step(np_keys)

        if method == 'pretty':
            end_index = len(np_keys)
            start_index = 1

            if self.using_datetime():
                offset = pd.Timedelta(minutes=1)
                offset = prepare_datetime(np_keys)
            else:
                offset = 0.0000000001

            if np_keys[0] == get_epoch_start(self.using_datetime()):
                np_keys[0] = np_keys[1] - offset

                np_keys = np.insert(np_keys, 0, np_keys[0] - offset)
                np_values = np.insert(np_values, 0, 0)
                np_keys[0] = np_keys[0] - offset

            step0_k = np_keys[0]
            step0_v = np_values[0]

            for i in range(len(np_keys)):

                k = np_keys[i]
                v = np_values[i]

                ax.hlines(y=step0_v, xmin=step0_k, xmax=k, **kargs)
                ax.vlines(x=k, ymin=step0_v, ymax=v, linestyles=':', **kargs)

                if i > start_index - 1 and i < end_index:
                    if i == start_index:
                        ax.plot(k, v, marker='o', fillstyle='full', **kargs)
                    else:
                        ax.plot(k,
                                step0_v,
                                marker='o',
                                fillstyle='none',
                                **kargs)
                        ax.plot(k, v, marker='o', fillstyle='full', **kargs)
                elif i == end_index:
                    ax.plot(k, step0_v, marker='o', fillstyle='none', **kargs)

                step0_k = k
                step0_v = v

        elif method == 'function':
            tsx = get_plot_range(self.first(),
                                 self.last(),
                                 interval,
                                 use_datetime=self.using_datetime())
            ax.step(tsx, self.step(tsx), where=where, **kargs)
        elif method == 'smooth_function':
            tsx = get_plot_range(self.first(),
                                 self.last(),
                                 interval,
                                 use_datetime=self.using_datetime())
            ax.plot(
                tsx,
                self.smooth_step(tsx,
                                 smooth_factor=smooth_factor,
                                 smooth_basis=smooth_basis), **kargs)
        elif method == 'smooth':
            if np_keys.shape[0] < 20:
                tsx = get_plot_range(self.first(),
                                     self.last(),
                                     interval,
                                     use_datetime=self.using_datetime())
                ax.plot(
                    tsx,
                    self.smooth_step(tsx,
                                     smooth_factor=smooth_factor,
                                     smooth_basis=smooth_basis), **kargs)
            else:
                # small offset to ensure we plot the initial step transition
                if self.using_datetime():
                    offset = pd.Timedelta(minutes=1)
                    np_keys = prepare_datetime(np_keys)
                else:
                    offset = 0.000000000001

                if np_keys[0] == get_epoch_start(self.using_datetime()):
                    np_keys[0] = np_keys[1] - offset
                elif not reverse_step:
                    np_keys = np.insert(np_keys, 0, np_keys[0] - offset)
                    np_values = np.insert(np_values, 0, 0)
                    np_keys[0] = np_keys[0] - offset

                ax.plot(
                    np_keys,
                    self.smooth_step(np_keys,
                                     smooth_factor=smooth_factor,
                                     smooth_basis=smooth_basis), **kargs)
        else:
            # small offset to ensure we plot the initial step transition
            if self.using_datetime():
                offset = pd.Timedelta(minutes=1)
                np_keys = prepare_datetime(np_keys)
            else:
                offset = 0.0000000001

            if np_keys[0] == get_epoch_start(self.using_datetime()):
                np_keys[0] = np_keys[1] - offset
            elif not reverse_step:
                np_keys = np.insert(np_keys, 0, np_keys[0] - offset)
                np_values = np.insert(np_values, 0, 0)
                np_keys[0] = np_keys[0] - offset

            ax.step(np_keys, np_values, where=where, **kargs)

        return ax
示例#12
0
    def plot_rolling_step(self,
                          rolling_function=None,
                          window=5,
                          pre_mid_post='mid',
                          ax=None,
                          **kargs):
        """
        Plot the result of applying a reduction function to a rolling window across the step values.

        Parameters
        ==============
        rolling_function : Numpy.ufunc, Optional
            A numpy reduction function to apply to the rolling window across the steps data, for example np.mean, np.max.

        widnow : int, Optional
            The size of the rolling window to apply across the steps data.

        pre_mid_post : {'pre','mid','post'}, Optional
            Where to centre the reduction location within the rolling window. Using centre will associate the reduced value with the centre key of the window.

        ax : Matplotlib.Axes
            The plot axis to create the plot on if being created externally.

        **kargs : 
            Matplotlib key-value paramters to pass to the plot.

        Returns
        ========
        Matplotlib.Axes

        See Also
        ==============
        ecdf_plot
        pacf_plot
        histogram_plot
        summary
        
        """

        if ax is None:
            plot_size = kargs.pop('figsize', None)
            if plot_size is None:
                plot_size = get_default_plot_size()

            _, ax = plt.subplots(figsize=plot_size)

        #if kargs.get('color') is None:
        #    kargs['color']=get_default_plot_color()

        np_keys = self.step_keys()

        # small offset to ensure we plot the initial step transition
        if self.using_datetime():
            offset = pd.Timedelta(minutes=1)
            np_keys = prepare_datetime(np_keys)
        else:
            offset = 0.0000001

        if np_keys[0] == get_epoch_start(self.using_datetime()):
            np_keys[0] = np_keys[1] - offset
        else:
            np_keys[0] = np_keys[0] - offset

        x, y = self.rolling_function_step(np_keys,
                                          rolling_function=rolling_function,
                                          window=window,
                                          pre_mid_post=pre_mid_post)
        ax.plot(x, y, **kargs)

        return ax