示例#1
0
文件: discrete.py 项目: tcsvn/pyadlml
def hist_activities(y, scale=None, color=None, figsize=(9, 3)):
    """
    Parameters
    ----------
    y: np array
        label of strings
    scale: None or log
    """
    title = 'Label occurence'
    xlabel = 'counts'
    color = (get_primary_color() if color is None else color)

    ser = pd.Series(data=y).value_counts()
    ser = ser.sort_values(ascending=True)

    figsize = (_num_bars_2_figsize(len(ser)) if figsize is None else figsize)
    fig, ax = plt.subplots(1, 1, figsize=figsize)

    if scale == 'log':
        plt.xscale('log')

    ax.barh(ser.index, ser.values, orientation='horizontal', color=color)
    plt.xlabel(xlabel)
    fig.suptitle(title)
示例#2
0
def hist_counts(df_devs=None,
                lst_devs=None,
                df_tc=None,
                figsize=None,
                y_scale=None,
                color=None,
                order='count',
                file_path=None):
    """
    bar chart displaying how often activities are occurring

    Parameters
    ----------
    df_devs : pd.DataFrame, optional
        recorded devices from a dataset. Fore more information refer to the
        :ref:`user guide<device_dataframe>`.
    lst_devs : lst of str, optional
        A list of devices that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    df_tc : pd.DataFrame
        A precomputed correlation table. If the *df_tcorr* parameter is given, parameters
        *df_devs* and *lst_devs* are ignored. The transition table can be computed
        in :ref:`stats <stats_devs_tcorr>`.
    y_scale : {"log", "linear"}, default: None
        The axis scale type to apply.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the primary color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    order : {'count', 'alphabetically', 'room'}, default='count'
        determines the order in which the devices are listed.
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plots import plot_device_bar_count
    >>> plot_device_bar_count(data.df_devices)

    .. image:: ../_static/images/plots/dev_bar_trigger.png
       :height: 300px
       :width: 500 px
       :scale: 90 %
       :alt: alternate text
       :align: center




    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing.
    """
    assert not (df_devs is None and df_tc is None)
    assert y_scale in ['log', 'linear']
    assert order in ['alphabetic', 'count', 'room']

    title = 'Device triggers'
    x_label = 'count'
    df_col = 'trigger_count'

    df = (devices_trigger_count(df_devs.copy(), lst_devs=lst_devs)
          if df_tc is None else df_tc)
    num_dev = len(df)
    figsize = (_num_bars_2_figsize(num_dev) if figsize is None else figsize)
    color = (get_primary_color() if color is None else color)

    if order == 'alphabetic':
        df = df.sort_values(by=[DEVICE], ascending=True)
    elif order == 'count':
        df = df.sort_values(by=[df_col])
    else:
        raise NotImplemented('the room order is going to be implemented')

    # plot
    fig, ax = plt.subplots(figsize=figsize)
    plt.title(title)
    plt.xlabel(x_label)
    ax.barh(df[DEVICE], df[df_col], color=color)

    if y_scale == 'log':
        ax.set_xscale('log')

    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig
示例#3
0
def hist_on_off(df_devs=None,
                lst_devs=None,
                df_onoff=None,
                figsize=None,
                color=None,
                color_sec=None,
                order='frac_on',
                file_path=None):
    """
    Plot bars the on/off fraction of all devices

    Parameters
    ----------
    df_devs : pd.DataFrame, optional
        recorded devices from a dataset. Fore more information refer to the
        :ref:`user guide<device_dataframe>`.
    lst_devs : lst of str, optional
        A list of devices that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    df_onoff : pd.DataFrame
        A precomputed correlation table. If the *df_tcorr* parameter is given, parameters
        *df_devs* and *lst_devs* are ignored. The transition table can be computed
        in :ref:`stats <stats_devs_tcorr>`.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the primary color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    color_sec : str, optional
        sets the secondary color of the plot. When not set, the secondary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    order : {'frac_on', 'alphabetically', 'room'}, default='frac_on'
        determines the order in which the devices are listed.
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plots import plot_device_on_off
    >>> plot_device_on_off(data.df_devices)

    .. image:: ../_static/images/plots/dev_on_off.png
       :height: 300px
       :width: 500 px
       :scale: 100 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing.
    """
    assert not (df_devs is None and df_onoff is None)
    assert order in ['frac_on', 'name', 'area']

    title = 'Devices fraction on/off'
    xlabel = 'Percentage in binary states'
    ylabel = 'Devices'
    on_label = 'on'
    off_label = 'off'

    color = (get_primary_color() if color is None else color)
    color2 = (get_secondary_color() if color_sec is None else color_sec)

    if df_onoff is None:
        df = devices_on_off_stats(df_devs, lst_devs=lst_devs)
    else:
        df = df_onoff

    num_dev = len(df)
    figsize = (_num_bars_2_figsize(num_dev) if figsize is None else figsize)

    if order == 'frac_on':
        df = df.sort_values(by='frac_on', axis=0)
    elif order == 'name':
        df = df.sort_values(by=DEVICE, axis=0)
    else:
        raise NotImplementedError(
            'room order will be implemented in the future')

    dev_lst = list(df[DEVICE])
    # Figure Size
    fig, ax = plt.subplots(figsize=figsize)
    if lst_devs is not None:
        df['tmp'] = 0
        plt.barh(df[DEVICE], df['tmp'].values, alpha=0.0)
        plt.barh(df[DEVICE],
                 df['frac_off'].values,
                 label=off_label,
                 color=color)
        plt.barh(df[DEVICE],
                 df['frac_on'].values,
                 left=df['frac_off'],
                 label=on_label,
                 color=color2)
    else:
        plt.barh(dev_lst, df['frac_off'].values, label=off_label, color=color)
        # careful: notice "bottom" parameter became "left"
        plt.barh(dev_lst,
                 df['frac_on'].values,
                 left=df['frac_off'],
                 label=on_label,
                 color=color2)

    # we also need to switch the labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    # set the text centers to the middle for the greater fraction
    widths = df['frac_off'].apply(lambda x: x if x >= 0.5 else 1 - x)
    xcenters = df['frac_off'].apply(lambda x: x / 2
                                    if x >= 0.5 else (1 - x) / 2 + x)
    first_number_left = True
    for y, c, w in zip(range(len(xcenters)), xcenters, widths):
        if y == len(xcenters) - 1 and c < 0.5:
            first_number_left = False
        if c > 0.5:
            text_color = 'black'
        else:
            text_color = 'white'
        ax.text(c,
                y,
                '{:.4f}'.format(w),
                ha='center',
                va='center',
                color=text_color)

    if first_number_left:
        ax.legend(ncol=2,
                  bbox_to_anchor=(0, 1),
                  loc='upper left',
                  fontsize='small')
    else:
        ax.legend(ncol=2,
                  bbox_to_anchor=(1, 1),
                  loc='upper right',
                  fontsize='small')

    # Remove axes splines
    for s in ['top', 'right']:
        ax.spines[s].set_visible(False)

    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig
示例#4
0
def hist_trigger_time_diff(df_devs=None,
                           x=None,
                           n_bins=50,
                           figsize=(10, 6),
                           color=None,
                           file_path=None):
    """
    plots

    Parameters
    ----------
    df_devs : pd.DataFrame, optional
        recorded devices from a dataset. Fore more information refer to the
        :ref:`user guide<device_dataframe>`.
    x : ndarray, optional
        array of time deltas used to plot the histogram
    n_bins : int, default=50
        the number of bins for the histogram.
    color : str, optional
        sets the color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plot import plot_trigger_time_dev
    >>> plot_trigger_time_dev_todo(data.df_devices)

    .. image:: ../_static/images/plots/dev_hist_trigger_td.png
       :height: 300px
       :width: 500 px
       :scale: 100 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing.


    """
    assert not (df_devs is None and x is None)
    title = 'Time difference between succeeding device'
    log_sec_col = 'total_log_secs'
    sec_col = 'total_secs'
    ylabel = 'count'
    ax2label = 'cummulative percentage'
    ax1label = 'timedeltas count '
    xlabel = 'log seconds'
    color = (get_primary_color() if color is None else color)
    color2 = get_secondary_color()

    if x is None:
        X = trigger_time_diff(df_devs.copy())
    else:
        X = x

    # make equal bin size from max to min
    bins = np.logspace(min(np.log10(X)), max(np.log10(X)), n_bins)

    # make data ready for hist
    hist, _ = np.histogram(X, bins=bins)
    cum_percentage = hist.cumsum() / hist.sum()
    cum_percentage = np.concatenate(
        ([0], cum_percentage))  # let the array start with 0

    # plots
    fig, ax = plt.subplots(figsize=figsize)
    plt.xscale('log')
    ax.hist(X, bins=bins, label=ax1label, color=color)
    ax.set_ylabel(ylabel)
    ax.set_xlabel(xlabel)

    # create axis for line
    ax2 = ax.twinx()
    ax2.plot(bins, cum_percentage, 'r', label=ax2label, color=color2)
    ax2.set_ylabel('%')
    ax2.set_xscale('log')

    ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x))
    ax_top.xaxis.set_major_formatter(
        ticker.FuncFormatter(func_formatter_seconds2time))

    # plot single legend for multiple axis
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    ax.legend(h1 + h2, l1 + l2, loc='center right')

    plt.title(title, y=1.08)

    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig
示例#5
0
def ridge_line(df_acts=None,
               lst_acts=None,
               df_act_dist=None,
               idle=False,
               n=1000,
               ylim_upper=None,
               color=None,
               figsize=None,
               file_path=None):
    """
    Plots the activity density distribution over one day.

    Parameters
    ----------
    df_acts : pd.DataFrame, optional
        recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    lst_acts : lst of str, optional
        A list of activities that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    df_act_dist : pd.DataFrame, optional
        A precomputed activity density distribution. If the *df_trans* parameter is given, parameters
        *df_acts* and *lst_acts* are ignored. The transition table can be computed
        in :ref:`stats <stats_acts_trans>`.
    n : int, default=1000
        The number of monte-carlo samples to draw.
    ylim_upper: float, optional
        The offset from the top of the plot to the first ridge_line. Set this if
        the automatically determined value is not satisfying.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    idle : bool, default: False
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plots import plot_activity_ridgeline
    >>> plot_activity_ridgeline(data.df_activities)

    .. image:: ../_static/images/plots/act_ridge_line.png
       :height: 300px
       :width: 500 px
       :scale: 90 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing.
    """
    assert not (df_acts is None and df_act_dist is None)

    title = 'Activity distribution over one day'
    xlabel = 'day'
    color = (get_primary_color() if color is None else color)

    if df_act_dist is None:
        if idle:
            df_acts = add_idle(df_acts)
        df = activities_dist(df_acts.copy(), lst_acts=lst_acts, n=n)
        if df.empty:
            raise ValueError(
                "no activity was recorded and no activity list was given.")
    else:
        df = df_act_dist

    def date_2_second(date):
        """ maps time onto seconds of a day 
        Parameters
        ----------
        date : np.datetime64
            all the dates are on the day 1990-01-01

        Returns
        -------

        """
        if pd.isnull(date):
            return -1
        val = (date - np.datetime64('1990-01-01')) / np.timedelta64(1, 's')
        total_seconds = 60 * 60 * 24
        assert val <= total_seconds and val >= 0
        return int(val)

    df = df.apply(np.vectorize(date_2_second))
    # sort every columns values ascending
    for col in df.columns:
        df[col] = df[col].sort_values()

    grouped = [(col, df[col].values) for col in df.columns]
    acts, data = zip(*grouped)
    num_act = len(list(acts))

    # infer visual properties
    figsize = (_num_items_2_ridge_figsize(num_act)
               if figsize is None else figsize)
    ylim_upper = (_num_items_2_ridge_ylimit(num_act)
                  if ylim_upper is None else ylim_upper)

    # plot the ridgeline
    fig, ax = plt.subplots(figsize=figsize)
    ridgeline(data,
              labels=acts,
              overlap=.85,
              fill=color,
              n_points=100,
              dist_scale=0.13)
    plt.title(title)

    plt.gca().spines['left'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    plt.ylim((0, ylim_upper))
    plt.xlabel(xlabel)

    # set xaxis labels
    def func(x, p):
        #x = x + 0.5
        #if x == 0.0 or str(x)[-1:] == '5':
        #    return ''
        #else:
        if True:
            if np.ceil(x / k) < 10:
                return '0{}:00'.format(int(x / k) + 1)
            else:
                return '{}:00'.format(int(x / k) + 1)

    a = 0
    b = 60 * 60 * 24
    k = (b - a) / 24

    plt.xlim((a, b))
    tcks_pos = np.arange(0, 23) * k + (-0.5 + k)

    x_locator = ticker.FixedLocator(tcks_pos)
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(func))
    ax.xaxis.set_major_locator(x_locator)
    fig.autofmt_xdate(rotation=45)

    plt.grid(zorder=0)

    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig
示例#6
0
def hist_cum_duration(df_acts=None,
                      lst_acts=None,
                      df_dur=None,
                      y_scale=None,
                      idle=False,
                      figsize=None,
                      color=None,
                      file_path=None):
    """
    Plots the cumulative duration for each activity in a bar plot.

    Parameters
    ----------
    df_acts : pd.DataFrame, optional
        recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    lst_acts : lst of str, optional
        A list of activities that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    y_scale : {"log", "linear"}, default: None
        The axis scale type to apply.
    idle : bool, default: False
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plots import plot_activity_bar_duration
    >>> plot_activity_bar_duration(data.df_activities)

    .. image:: ../_static/images/plots/act_bar_dur.png
       :height: 300px
       :width: 500 px
       :scale: 90 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing
    """
    assert y_scale in [None, 'log']
    assert not (df_acts is None and df_dur is None)

    title = 'Cummulative activity durations'
    xlabel = 'seconds'
    freq = 'seconds'
    color = (get_primary_color() if color is None else color)

    if df_dur is None:
        if idle:
            df_acts = add_idle(df_acts.copy())
        df = activity_durations(df_acts, lst_acts=lst_acts, time_unit=freq)
    else:
        df = df_dur
    df = df.sort_values(by=[freq], axis=0)

    num_act = len(df)
    figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize)

    # plot
    fig, ax = plt.subplots(figsize=figsize)
    plt.title(title)
    plt.xlabel(xlabel)
    ax.barh(df['activity'], df['seconds'], color=color)
    if y_scale == 'log':
        ax.set_xscale('log')

    # create secondary axis with time format 1s, 1m, 1d
    ax_top = ax.secondary_xaxis('top', functions=(lambda x: x, lambda x: x))
    ax_top.set_xlabel('time')
    ax_top.xaxis.set_major_formatter(
        ticker.FuncFormatter(func_formatter_seconds2time))

    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig
示例#7
0
def hist_counts(df_acts=None,
                lst_acts=None,
                df_ac=None,
                y_scale="linear",
                idle=False,
                figsize=None,
                color=None,
                file_path=None):
    """
    Plot a bar chart displaying how often activities are occurring.

    Parameters
    ----------
    df_acts : pd.DataFrame, optional
        recorded activities from a dataset. Fore more information refer to the
        :ref:`user guide<activity_dataframe>`.
    lst_acts : lst of str, optional
        A list of activities that are included in the statistic. The list can be a
        subset of the recorded activities or contain activities that are not recorded.
    idle : bool, default: False
        Determines whether gaps between activities should be assigned
        the activity *idle* or be ignored.
    y_scale : {"log", "linear"}, default: linear
        The axis scale type to apply.
    figsize : (float, float), default: None
        width, height in inches. If not provided, the figsize is inferred by automatically.
    color : str, optional
        sets the color of the plot. When not set, the primary theming color is used.
        Learn more about theming in the :ref:`user guide <theming>`
    file_path : str, optional
        If set, saves the plot under the given file path and return *None* instead
        of returning the figure.

    Examples
    --------
    >>> from pyadlml.plot import plot_activity_bar_count
    >>> plot_activity_bar_count(data.df_activities, idle=True);

    .. image:: ../_static/images/plots/act_bar_cnt.png
       :height: 300px
       :width: 500 px
       :scale: 90 %
       :alt: alternate text
       :align: center

    Returns
    -------
    res : fig or None
        Either a figure if file_path is not specified or nothing 
    """
    assert not (df_acts is None and df_ac is None)
    assert y_scale in [None, 'log']

    title = 'Activity occurrences'
    col_label = 'occurrence'
    xlabel = 'counts'
    color = (get_primary_color() if color is None else color)

    # create statistics if the don't exists
    if df_ac is None:
        df_acts = df_acts.copy()
        if idle:
            df_acts = add_idle(df_acts)
        df = activities_count(df_acts, lst_acts=lst_acts)
    else:
        df = df_ac

    # prepare dataframe for plotting
    df.reset_index(level=0, inplace=True)
    df = df.sort_values(by=[col_label], axis=0)

    # define plot modalities
    num_act = len(df)
    figsize = (_num_bars_2_figsize(num_act) if figsize is None else figsize)

    # create plot
    fig, ax = plt.subplots(figsize=figsize)
    plt.title(title)
    plt.xlabel(xlabel)
    ax.barh(df['activity'], df[col_label], color=color)

    if y_scale == 'log':
        ax.set_xscale('log')

    # save or return fig
    if file_path is not None:
        savefig(fig, file_path)
        return
    else:
        return fig