def test_cum_mean(es): log_value_feat = es['log']['value'] cum_mean = CumMean(log_value_feat, es['log']['session_id']) features = [cum_mean] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) cvalues = df[cum_mean.get_name()].values assert len(cvalues) == 15 cum_mean_values = [0, 2.5, 5, 7.5, 10, 0, .5, 1, 1.5, 0, 0, 2.5, 0, 3.5, 7] for i, v in enumerate(cum_mean_values): assert v == cvalues[i]
def test_cum_mean_where(es): log_value_feat = es['log']['value'] compare_feat = GreaterThan(log_value_feat, 3) dfeat = Feature(es['sessions']['customer_id'], es['log']) cum_mean = CumMean(log_value_feat, dfeat, where=compare_feat) features = [cum_mean] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) cvalues = df[cum_mean.get_name()].values assert len(cvalues) == 15 cum_mean_values = [ 0, 5, 7.5, 10, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 0, 5, 5, 6, 26. / 3 ] for i, v in enumerate(cum_mean_values): if not np.isnan(v): assert v == cvalues[i] else: assert (np.isnan(cvalues[i]))
def test_cum_mean_use_previous_and_where(es): log_value_feat = es['log']['value'] compare_feat = GreaterThan(log_value_feat, 3) # todo should this be cummean? dfeat = Feature(es['sessions']['customer_id'], es['log']) cum_mean = CumMean(log_value_feat, dfeat, where=compare_feat, use_previous=Timedelta(2, 'observations', entity=es['log'])) features = [cum_mean] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) cum_mean_values = [0, 5, 7.5, 12.5, 17.5, 17.5, 17.5, 17.5, 17.5, 17.5, 0, 5, 5, 6, 10.5] cvalues = df[cum_mean.get_name()].values assert len(cvalues) == 15 for i, v in enumerate(cum_mean_values): assert v == cvalues[i]
def test_cum_mean_where(es): log_value_feat = es['log']['value'] compare_feat = GreaterThan(log_value_feat, 3) dfeat = Feature(es['sessions']['customer_id'], es['log']) cum_mean = CumMean(log_value_feat, dfeat, where=compare_feat) features = [cum_mean] pandas_backend = PandasBackend(es, features) df = pandas_backend.calculate_all_features(instance_ids=range(15), time_last=None) cvalues = df[cum_mean.get_name()].values assert len(cvalues) == 15 cum_mean_values = [0, 5, 7.5, 10, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 0, 5, 5, 6, 26. / 3] for i, v in enumerate(cum_mean_values): if not np.isnan(v): assert v == cvalues[i] else: assert (np.isnan(cvalues[i]))