def test_cum_mean(es):
    log_value_feat = es['log']['value']
    cum_mean = CumMean(log_value_feat, es['log']['session_id'])
    features = [cum_mean]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)
    cvalues = df[cum_mean.get_name()].values
    assert len(cvalues) == 15
    cum_mean_values = [0, 2.5, 5, 7.5, 10, 0, .5, 1, 1.5, 0, 0, 2.5, 0, 3.5, 7]
    for i, v in enumerate(cum_mean_values):
        assert v == cvalues[i]
def test_cum_mean(es):
    log_value_feat = es['log']['value']
    cum_mean = CumMean(log_value_feat, es['log']['session_id'])
    features = [cum_mean]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)
    cvalues = df[cum_mean.get_name()].values
    assert len(cvalues) == 15
    cum_mean_values = [0, 2.5, 5, 7.5, 10, 0, .5, 1, 1.5, 0, 0, 2.5, 0, 3.5, 7]
    for i, v in enumerate(cum_mean_values):
        assert v == cvalues[i]
def test_cum_mean_where(es):
    log_value_feat = es['log']['value']
    compare_feat = GreaterThan(log_value_feat, 3)
    dfeat = Feature(es['sessions']['customer_id'], es['log'])
    cum_mean = CumMean(log_value_feat, dfeat, where=compare_feat)
    features = [cum_mean]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)
    cvalues = df[cum_mean.get_name()].values
    assert len(cvalues) == 15
    cum_mean_values = [
        0, 5, 7.5, 10, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5, 0, 5, 5, 6, 26. / 3
    ]

    for i, v in enumerate(cum_mean_values):
        if not np.isnan(v):
            assert v == cvalues[i]
        else:
            assert (np.isnan(cvalues[i]))
def test_cum_mean_use_previous_and_where(es):
    log_value_feat = es['log']['value']
    compare_feat = GreaterThan(log_value_feat, 3)
    # todo should this be cummean?
    dfeat = Feature(es['sessions']['customer_id'], es['log'])
    cum_mean = CumMean(log_value_feat, dfeat,
                       where=compare_feat,
                       use_previous=Timedelta(2, 'observations',
                                              entity=es['log']))
    features = [cum_mean]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)

    cum_mean_values = [0, 5, 7.5, 12.5, 17.5, 17.5, 17.5, 17.5, 17.5, 17.5,
                       0, 5, 5, 6, 10.5]
    cvalues = df[cum_mean.get_name()].values
    assert len(cvalues) == 15
    for i, v in enumerate(cum_mean_values):
        assert v == cvalues[i]
def test_cum_mean_use_previous_and_where(es):
    log_value_feat = es['log']['value']
    compare_feat = GreaterThan(log_value_feat, 3)
    # todo should this be cummean?
    dfeat = Feature(es['sessions']['customer_id'], es['log'])
    cum_mean = CumMean(log_value_feat, dfeat,
                       where=compare_feat,
                       use_previous=Timedelta(2, 'observations',
                                              entity=es['log']))
    features = [cum_mean]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)

    cum_mean_values = [0, 5, 7.5, 12.5, 17.5, 17.5, 17.5, 17.5, 17.5, 17.5,
                       0, 5, 5, 6, 10.5]
    cvalues = df[cum_mean.get_name()].values
    assert len(cvalues) == 15
    for i, v in enumerate(cum_mean_values):
        assert v == cvalues[i]
def test_cum_mean_where(es):
    log_value_feat = es['log']['value']
    compare_feat = GreaterThan(log_value_feat, 3)
    dfeat = Feature(es['sessions']['customer_id'], es['log'])
    cum_mean = CumMean(log_value_feat, dfeat,
                       where=compare_feat)
    features = [cum_mean]
    pandas_backend = PandasBackend(es, features)
    df = pandas_backend.calculate_all_features(instance_ids=range(15),
                                               time_last=None)
    cvalues = df[cum_mean.get_name()].values
    assert len(cvalues) == 15
    cum_mean_values = [0, 5, 7.5, 10, 12.5, 12.5, 12.5, 12.5, 12.5, 12.5,
                       0, 5, 5, 6, 26. / 3]

    for i, v in enumerate(cum_mean_values):
        if not np.isnan(v):
            assert v == cvalues[i]
        else:
            assert (np.isnan(cvalues[i]))