Пример #1
0
    def __resampleData(self, data: pd.Series, resampleFreq: str,
                       aggStrategy: str) -> pd.Series:
        if len(data) == 0:
            return data
        if pd.isna(resampleFreq):
            return data
        if not (resampleFreq.lower() in ['s', 'm', 'b', 'h', 'd']):
            return data
        if pd.isna(aggStrategy) or (aggStrategy.lower() == 'raw'):
            return data

        # storing series labels
        seriesName = data.name
        indName = data.index.name

        # changing series labels
        data.name = 'vals'
        data.index.name = 'times'
        data = data.reset_index()
        # modify times as per resampleFreq
        # https://stackoverflow.com/questions/43400331/remove-seconds-and-minutes-from-a-pandas-dataframe-column
        if resampleFreq.lower() == 'd':
            data = data.assign(times=data.times.dt.floor('D'))
        elif resampleFreq.lower() == 'h':
            data = data.assign(times=data.times.dt.floor('H'))
        elif resampleFreq.lower() == 'm':
            data = data.assign(times=data.times.dt.floor('min'))
        elif resampleFreq.lower() == 's':
            data = data.assign(times=data.times.dt.floor('S'))
        elif resampleFreq.lower() == 'b':
            data = data.assign(times=data.times.dt.floor('min'))
            data.times = data.times.map(
                lambda x: x.replace(minute=(x.minute - x.minute % 15)))

        # aggregate the samples based on times
        if aggStrategy.lower() == 'snap':
            data = data.groupby('times', as_index=False).first()
        elif aggStrategy.lower() == 'average':
            data = data.groupby('times', as_index=False).mean()
        data = pd.Series(data.vals.values, index=data.times.values)

        # restore original labels
        data.name = seriesName
        data.index.name = indName
        return data