Python Percentile.Percentile примеры использования

Язык программирования: Python

Пространство имен/Пакет: featuretools.primitives

Класс/Тип: Percentile

Метод/Функция: Percentile

Примеров на hotexamples.com: 9

Python Percentile.Percentile - 9 примеров найдено. Это лучшие примеры Python кода для featuretools.primitives.Percentile.Percentile, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Percentile(9)

get_name(1)

Основные методы

Percentile (9)

get_name (1)

Пример #1

Показать файл

def test_dependent_percentile(es):
    v = Feature(es['log']['value'])
    p = Percentile(v)
    p2 = Percentile(p - 1)
    pandas_backend = PandasBackend(es, [p, p2])
    df = pandas_backend.calculate_all_features(range(10, 17), None)
    true = es['log'].df[v.get_name()].rank(pct=True)
    true = true.loc[range(10, 17)]
    for t, a in zip(true.values, df[p.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or t == a

Пример #2

Показать файл

def test_percentile_agg_percentile(es):
    v = Feature(es['log']['value'])
    p = Percentile(v)
    agg = Sum(p, es['sessions'])
    pagg = Percentile(agg)
    pandas_backend = PandasBackend(es, [pagg])
    df = pandas_backend.calculate_all_features([0, 1], None)

    log_vals = es['log'].df[[v.get_name(), 'session_id']]
    log_vals['percentile'] = log_vals[v.get_name()].rank(pct=True)
    true_p = log_vals.groupby('session_id')['percentile'].sum().fillna(0)
    true_p = true_p.rank(pct=True)[[0, 1]]

    for t, a in zip(true_p.values, df[pagg.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or t == a

Пример #3

Показать файл

def test_percentile_with_cutoff(es):
    v = Feature(es['log']['value'])
    p = Percentile(v)
    pandas_backend = PandasBackend(es, [p])
    df = pandas_backend.calculate_all_features(
        [2], pd.Timestamp('2011/04/09 10:30:13'))
    assert df[p.get_name()].tolist()[0] == 1.0

Пример #4

Показать файл

def test_approximate_dfeat_of_need_all_values(entityset):
    es = entityset
    p = Percentile(es['log']['value'])
    agg_feat = Sum(p, es['sessions'])
    agg_feat2 = Sum(agg_feat, es['customers'])
    dfeat = DirectFeature(agg_feat2, es['sessions'])

    feature_matrix = calculate_feature_matrix([dfeat, agg_feat],
                                              entityset,
                                              instance_ids=[0, 2],
                                              approximate=Timedelta(10, 's'),
                                              cutoff_time_in_index=True,
                                              cutoff_time=[datetime(2011, 4, 9, 10, 31, 19),
                                                           datetime(2011, 4, 9, 11, 0, 0)])
    log_df = es['log'].df
    instances = [0, 2]
    cutoffs = [pd.Timestamp('2011-04-09 10:31:19'), pd.Timestamp('2011-04-09 11:00:00')]
    approxes = [pd.Timestamp('2011-04-09 10:31:10'), pd.Timestamp('2011-04-09 11:00:00')]
    true_vals = []
    true_vals_approx = []
    for instance, cutoff, approx in zip(instances, cutoffs, approxes):
        log_data_cutoff = log_df[log_df['datetime'] < cutoff]
        log_data_cutoff['percentile'] = log_data_cutoff['value'].rank(pct=True)
        true_agg = log_data_cutoff.loc[log_data_cutoff['session_id'] == instance, 'percentile'].fillna(0).sum()
        true_vals.append(round(true_agg, 3))

        log_data_approx = log_df[log_df['datetime'] < approx]
        log_data_approx['percentile'] = log_data_approx['value'].rank(pct=True)
        true_agg_approx = log_data_approx.loc[log_data_approx['session_id'].isin([0, 1, 2]), 'percentile'].fillna(0).sum()
        true_vals_approx.append(round(true_agg_approx, 3))
    lapprox = [round(x, 3) for x in feature_matrix[dfeat.get_name()].tolist()]
    test_list = [round(x, 3) for x in feature_matrix[agg_feat.get_name()].tolist()]
    assert lapprox == true_vals_approx
    assert test_list == true_vals

Пример #5

Показать файл

Файл: test_calculate_feature_matrix.py Проект: xinfushe/python-new

def test_uses_full_entity_feat_of_approximate(entityset):
    es = entityset
    agg_feat = Sum(es['log']['value'], es['sessions'])
    agg_feat2 = Sum(agg_feat, es['customers'])
    agg_feat3 = Min(agg_feat, es['customers'])
    dfeat = DirectFeature(agg_feat2, es['sessions'])
    dfeat2 = DirectFeature(agg_feat3, es['sessions'])
    p = Percentile(dfeat)

    # only dfeat2 should be approximated
    # because Percentile needs all values

    feature_matrix_only_dfeat2 = calculate_feature_matrix(
        [dfeat2],
        instance_ids=[0, 2],
        approximate=Timedelta(10, 's'),
        cutoff_time_in_index=True,
        cutoff_time=[
            datetime(2011, 4, 9, 10, 31, 19),
            datetime(2011, 4, 9, 11, 0, 0)
        ])
    assert feature_matrix_only_dfeat2[dfeat2.get_name()].tolist() == [1, 0]

    feature_matrix_approx = calculate_feature_matrix(
        [p, dfeat, dfeat2, agg_feat],
        instance_ids=[0, 2],
        approximate=Timedelta(10, 's'),
        cutoff_time_in_index=True,
        cutoff_time=[
            datetime(2011, 4, 9, 10, 31, 19),
            datetime(2011, 4, 9, 11, 0, 0)
        ])
    assert feature_matrix_only_dfeat2[dfeat2.get_name()].tolist(
    ) == feature_matrix_approx[dfeat2.get_name()].tolist()

    feature_matrix_small_approx = calculate_feature_matrix(
        [p, dfeat, dfeat2, agg_feat],
        instance_ids=[0, 2],
        approximate=Timedelta(10, 'ms'),
        cutoff_time_in_index=True,
        cutoff_time=[
            datetime(2011, 4, 9, 10, 31, 19),
            datetime(2011, 4, 9, 11, 0, 0)
        ])

    feature_matrix_no_approx = calculate_feature_matrix(
        [p, dfeat, dfeat2, agg_feat],
        instance_ids=[0, 2],
        cutoff_time_in_index=True,
        cutoff_time=[
            datetime(2011, 4, 9, 10, 31, 19),
            datetime(2011, 4, 9, 11, 0, 0)
        ])
    for f in [p, dfeat, agg_feat]:
        for fm1, fm2 in combinations([
                feature_matrix_approx, feature_matrix_small_approx,
                feature_matrix_no_approx
        ], 2):
            assert fm1[f.get_name()].tolist() == fm2[f.get_name()].tolist()

Пример #6

Показать файл

Файл: test_transform_features.py Проект: wangbin321/featuretools

def test_percentile(es):
    v = Feature(es['log']['value'])
    p = Percentile(v)
    pandas_backend = PandasBackend(es, [v, p])
    df = pandas_backend.calculate_all_features(range(17), None)
    true = df[v.get_name()].rank(pct=True)
    for t, a in zip(true.values, df[p.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or t == a

Пример #7

Показать файл

def test_direct_percentile(es):
    v = Feature(es['customers']['age'])
    p = Percentile(v)
    d = Feature(p, es['sessions'])
    pandas_backend = PandasBackend(es, [d])
    df = pandas_backend.calculate_all_features([0, 1], None)

    cust_vals = es['customers'].df[[v.get_name()]]
    cust_vals['percentile'] = cust_vals[v.get_name()].rank(pct=True)
    true_p = cust_vals['percentile'].loc[[0, 0]]
    for t, a in zip(true_p.values, df[d.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or t == a

Пример #8

Показать файл

def test_direct_agg_percentile(es):
    v = Feature(es['log']['value'])
    p = Percentile(v)
    agg = Sum(p, es['customers'])
    d = Feature(agg, es['sessions'])
    pandas_backend = PandasBackend(es, [d])
    df = pandas_backend.calculate_all_features([0, 1], None)

    log_vals = es['log'].df[[v.get_name(), 'session_id']]
    log_vals['percentile'] = log_vals[v.get_name()].rank(pct=True)
    log_vals['customer_id'] = [0] * 10 + [1] * 5 + [2] * 2
    true_p = log_vals.groupby('customer_id')['percentile'].sum().fillna(0)
    true_p = true_p[[0, 0]]
    for t, a in zip(true_p.values, df[d.get_name()].values):
        assert (pd.isnull(t) and pd.isnull(a)) or round(t, 3) == round(a, 3)

Пример #9

Показать файл

def test_two_kinds_of_dependents(es):
    v = Feature(es['log']['value'])
    product = Feature(es['log']['product_id'])
    agg = Sum(v, es['customers'], where=product == 'coke zero')
    p = Percentile(agg)
    g = Absolute(agg)
    agg2 = Sum(v, es['sessions'], where=product == 'coke zero')
    # Adding this feature in tests line 218 in pandas_backend
    # where we remove columns in result_frame that already exist
    # in the output entity_frames in preparation for pd.concat
    # In a prior version, this failed because we changed the result_frame
    # variable itself, rather than making a new variable _result_frame.
    # When len(output_frames) > 1, the second iteration won't have
    # all the necessary columns because they were removed in the first
    agg3 = Sum(agg2, es['customers'])
    pandas_backend = PandasBackend(es, [p, g, agg3])
    df = pandas_backend.calculate_all_features([0, 1], None)
    assert df[p.get_name()].tolist() == [0.5, 1.0]
    assert df[g.get_name()].tolist() == [15, 26]