Пример #1
0
def test_accepts_cutoff_time_compose(dataframes, relationships):
    def fraud_occured(df):
        return df["fraud"].any()

    lm = cp.LabelMaker(
        target_dataframe_name="card_id",
        time_index="transaction_time",
        labeling_function=fraud_occured,
        window_size=1,
    )

    transactions_df = to_pandas(dataframes["transactions"][0])

    labels = lm.search(transactions_df, num_examples_per_instance=-1)

    labels["time"] = pd.to_numeric(labels["time"])
    labels.rename({"card_id": "id"}, axis=1, inplace=True)

    feature_matrix, features = dfs(
        dataframes=dataframes,
        relationships=relationships,
        target_dataframe_name="cards",
        cutoff_time=labels,
    )
    feature_matrix = to_pandas(feature_matrix, index="id")
    assert len(feature_matrix.index) == 6
    assert len(feature_matrix.columns) == len(features) + 1
Пример #2
0
def test_accepts_cutoff_time_compose(entities, relationships):
    def fraud_occured(df):
        return df['fraud'].any()

    lm = cp.LabelMaker(target_entity='card_id',
                       time_index='transaction_time',
                       labeling_function=fraud_occured,
                       window_size=1)

    transactions_df = entities['transactions'][0]
    if isinstance(transactions_df, dd.DataFrame):
        transactions_df = transactions_df.compute()

    labels = lm.search(transactions_df, num_examples_per_instance=-1)

    labels['time'] = pd.to_numeric(labels['time'])
    labels.rename({'card_id': 'id'}, axis=1, inplace=True)

    feature_matrix, features = dfs(entities=entities,
                                   relationships=relationships,
                                   target_entity="cards",
                                   cutoff_time=labels)
    if isinstance(feature_matrix, dd.DataFrame):
        feature_matrix = feature_matrix.compute().set_index('id')
    assert len(feature_matrix.index) == 6
    assert len(feature_matrix.columns) == len(features) + 1
Пример #3
0
def lt(es):
    def label_func(df):
        return df['value'].sum() > 10

    lm = cp.LabelMaker(target_entity='id',
                       time_index='datetime',
                       labeling_function=label_func,
                       window_size='1m')

    df = es['log'].df
    df = to_pandas(df)
    labels = lm.search(df, num_examples_per_instance=-1)
    labels = labels.rename(columns={'cutoff_time': 'time'})
    return labels
Пример #4
0
def lt(es):
    def label_func(df):
        return df["value"].sum() > 10

    lm = cp.LabelMaker(
        target_dataframe_name="id",
        time_index="datetime",
        labeling_function=label_func,
        window_size="1m",
    )

    df = es["log"]
    df = to_pandas(df)
    labels = lm.search(df, num_examples_per_instance=-1)
    labels = labels.rename(columns={"cutoff_time": "time"})
    return labels
Пример #5
0
def total_spent(transactions, total_spent_fn):
    lm = cp.LabelMaker(target_entity='customer_id',
                       time_index='time',
                       labeling_function=total_spent_fn)
    lt = lm.search(transactions, num_examples_per_instance=1, verbose=False)
    return lt