Пример #1
0
def plot_all_series(data, eps=0.1):
    dates = reduce(pd.DatetimeIndex.union, [util.days(d.data.index) for d in data])
    ax = prepare_axis(dates)
    n = len(data)
    intervals = np.column_stack([np.linspace(eps - 0.5, 0.5 - eps, n, False), [(1.0 - 2 * eps) / n] * n])
    for interval, d in zip(intervals, data):
        d.plot(dates[0], ax, interval)
    plt.show()
Пример #2
0
def plot_all_series(data, eps=0.1):
    dates = reduce(pd.DatetimeIndex.union,
                   [util.days(d.data.index) for d in data])
    ax = prepare_axis(dates)
    n = len(data)
    intervals = np.column_stack([
        np.linspace(eps - 0.5, 0.5 - eps, n, False), [(1.0 - 2 * eps) / n] * n
    ])
    for interval, d in zip(intervals, data):
        d.plot(dates[0], ax, interval)
    plt.show()
Пример #3
0
    def from_sojourns(cls, sojourns, subj):
        # FIXME this whole method is horrifying
        durs = range(10, 61, 10)
        day_idx = util.days(sojourns.raw_data.index)
        # FIXME does this still work if we pass in the index?
        out = cls.prepare_output().reindex(index=day_idx[:-1])
        out_cols = set(out.columns)

        out['subject'] = subj
        out['day'] = util.WEEKDAYS[out.index.weekday]
        out['sleep_ranges'] = map(sleep_ranges,
                                  util.slice_data(sojourns.data, day_idx))
        out['total_counts'] = [total(day, 'counts') for day in
                                   util.slice_data(sojourns.raw_data, day_idx)]
        if 'steps' in sojourns.raw_data.columns:
            out['AG_steps'] = [total(day, 'steps') for day in
                                   util.slice_data(sojourns.raw_data, day_idx)]
        if 'AP.steps' in sojourns.raw_data.columns:
            out['AP_steps'] = [2*total(day, 'AP.steps') for day in
                                   util.slice_data(sojourns.raw_data, day_idx)]
        for classifier in util.classifiers:
            grouped = util.group_by_classifier(sojourns.data, classifier)
            sliced = util.slice_data(grouped, day_idx)
            hours = util.clock_hours(grouped.index)
            key = 'min_%s' % classifier.name
            if key in out_cols:
                out[key] = [delta(day)[day[classifier.cname].notnull()].sum() /
                                pd.Timedelta(1, 'm')
                            for day in sliced]
            key = '%s_periods' % classifier.name
            if key in out_cols:
                out[key] = [day[classifier.cname].nunique() for day in sliced]
            key = 'mean_%s_len' % classifier.name
            # Weight by minute, not by bout.
            if key in out_cols:
                out[key] = [(delta(day) / delta(day).where(
                                     day[classifier.cname].notnull()).sum() *
                                 day['bout_Dur']).sum() / pd.Timedelta(1, 'm')
                            for day in sliced]
            # Include bouts that extend into neighboring days
            key = 'median_%s_len' % classifier.name
            if key in out_cols:
                out[key] = [day.groupby(classifier.cname).first()['bout_Dur']
                                .median() / pd.Timedelta(1, 'm')
                            for day in sliced]
            # Incorrectly allow intensity to bleed across midnight because the
            # current architecture doesn't allow us to do this right.
            key = 'mean_%s_intensity' % classifier.name
            if key in out_cols:
                out[key] = [delta(day) / delta(day).where(
                                    day[classifier.cname].notnull()).sum() *
                                day['counts'].sum()
                            for day in sliced]
            # Unused
            # This is wrong; it can't be computed from processed data
#            key = 'median_%s_intensity' % classifier.name
#            if key in out_cols:
#                out[key] = [day.ix[day[classifier.cname].notnull(),'counts']
#                                .median()
#                            for day in sliced]
            for dur in durs:
                key = '%s_length_%d' % (classifier.name, dur)
                if key in out_cols:
                    out[key] = [delta(day)[day[classifier.cname].notnull() &
                                               (day['bout_Dur'] >=
                                                    pd.Timedelta(dur, 'm'))] \
                                    .sum() / pd.Timedelta(1, 'm')
                                for day in sliced]
            for hh, hour in zip(hours, util.slice_data(grouped, hours)):
                # unlike sliced, hour can be empty
                key = '%s_circadian_%d' % (classifier.name, hh.hour)
                if key in out_cols:
                    out.ix[pd.Timestamp(hh.date(), tz=util.tz),key] = \
                        delta(hour)[hour[classifier.cname].notnull()].sum() / \
                            pd.Timedelta(1, 'm')
                for dur in durs:
                    key = '%s_circadian_%d_length_%d' % (classifier.name,
                                                         hh.hour, dur)
                    if key in out_cols:
                        out.ix[pd.Timestamp(hh.date(), tz=util.tz),key] = \
                            delta(hour)[hour[classifier.cname].notnull() &
                                           (hour['bout_Dur'] >=
                                                pd.Timedelta(dur, 'm'))
                                       ].sum() / pd.Timedelta(1, 'm')
            out['counts_per_min'] = out['total_counts'] / out['min_awake']
            out['break_rate'] = (60*out['sedentary_periods'] /
                out['min_sedentary'])

        return cls(out)