def handler(grouped): se = grouped.set_index('saledate')['SalePrice'].sort_index() # se is the (ordered) time series of sales restricted to a single basket # we can now create a dataframe by combining different metrics conc = concat( { 'MeanToDate': expanding_mean(se).shift(1).fillna(method='ffill'), # cumulative mean 'MedianToDate': expanding_median(se).shift(1).fillna(method='ffill'), # cumulative mean 'MaxToDate': se.cummax().shift(1).fillna(method='ffill'), # cumulative max 'MinToDate': se.cummin().shift(1).fillna(method='ffill'), # cumulative max 'PrevSale': se.shift(1).fillna(method='ffill'), # previous sale 'SaleCount': expanding_count(se) # cumulative count }, axis=1 ) # bring back SalesID, needed for join se = grouped.set_index('saledate')['SalesID'].sort_index() conc['SalesID'] = se return conc
def test_expanding_count(self): result = mom.expanding_count(self.series) assert_almost_equal(result, mom.rolling_count(self.series, len(self.series)))