Python LabelMaker示例，composeml.LabelMaker Python示例

示例#1

0

显示文件

def test_search_with_undefined_labels(transactions, total_spent_fn):
    def total_spent(ds):
        return total_spent_fn(ds) % 3

    lm = LabelMaker(target_entity='customer_id',
                    time_index='time',
                    labeling_function=total_spent)

    n_examples = {1: 1, 2: 1}
    given_labels = lm.search(transactions,
                             num_examples_per_instance=n_examples,
                             gap=1)
    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '0,2019-01-01 08:30:00,1',
        '1,2019-01-01 09:30:00,2',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,1',
        '2,2019-01-01 11:30:00,2',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

示例#2

0

显示文件

def test_label_type(transactions, total_spent_fn):
    lm = LabelMaker(target_entity='customer_id',
                    time_index='time',
                    labeling_function=total_spent_fn)
    lt = lm.search(transactions, num_examples_per_instance=1)
    assert lt.target_types['total_spent'] == 'continuous'
    assert lt.bin(2).target_types['total_spent'] == 'discrete'

示例#3

0

显示文件

文件： test_label_maker.py 项目： anhuaxiang/compose

def test_search_offset_mix_2(transactions, total_spent_fn):
    """
    Test offset mix with window_size (absolute), minimum_data (relative), and gap (absolute).
    """
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size='30min',
    )

    given_labels = lm.search(
        transactions,
        num_examples_per_instance=2,
        minimum_data=2,
        verbose=False,
    )

    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,cutoff_time,total_spent',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 11:30:00,1',
        '2,2019-01-01 12:00:00,1',
    ]

    assert given_labels == labels

示例#4

0

显示文件

文件： test_label_maker.py 项目： anhuaxiang/compose

def test_search_offset_mix_7(transactions, total_spent_fn):
    """
    Test offset mix with window_size (relative), minimum_data (relative), and gap (relative).
    """

    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size=10,
    )

    given_labels = lm.search(
        transactions,
        num_examples_per_instance=float('inf'),
        verbose=False,
    )

    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,cutoff_time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '1,2019-01-01 09:00:00,3',
        '2,2019-01-01 10:30:00,4',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

示例#5

0

显示文件

def test_search_offset_mix_3(transactions, total_spent_fn):
    """
    Test offset mix with window_size (absolute), minimum_data (absolute), and gap (relative).
    """
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size='8h',
    )

    given_labels = lm.search(
        transactions,
        num_examples_per_instance=-1,
        minimum_data='2019-01-01 08:00:00',
        gap=1,
    )

    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '0,2019-01-01 08:30:00,1',
        '1,2019-01-01 09:00:00,3',
        '1,2019-01-01 09:30:00,2',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,4',
        '2,2019-01-01 11:00:00,3',
        '2,2019-01-01 11:30:00,2',
        '2,2019-01-01 12:00:00,1',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

示例#6

0

显示文件

def test_column_based_windows(transactions, total_spent_fn):
    session_id = [1, 2, 3, 3, 4, 5, 5, 5, 6, 7]
    df = transactions.assign(session_id=session_id)

    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        window_size='session_id',
        labeling_function=total_spent_fn,
    )

    actual = lm.search(df, -1).pipe(to_csv, index=False)

    expected = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,1',
        '0,2019-01-01 08:30:00,1',
        '1,2019-01-01 09:00:00,2',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,3',
        '2,2019-01-01 12:00:00,1',
        '3,2019-01-01 12:30:00,1',
    ]

    assert actual == expected

示例#7

0

显示文件

文件： test_label_maker.py 项目： anhuaxiang/compose

def test_label_type(transactions, total_spent_fn):
    lm = LabelMaker(target_entity='customer_id',
                    time_index='time',
                    labeling_function=total_spent_fn)
    lt = lm.search(transactions,
                   num_examples_per_instance=1,
                   label_type='discrete',
                   verbose=False)
    assert lt.label_type == 'discrete'

示例#8

0

显示文件

def test_data_slice_overlap(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size='1h',
    )

    for ds in lm.slice(transactions, num_examples_per_instance=2):
        overlap = ds.index == ds.context.slice_stop
        assert not overlap.any()

示例#9

0

显示文件

def test_slice_overlap(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size='1h',
    )

    for df in lm.slice(transactions, num_examples_per_instance=2):
        start, end = df.context.window
        is_overlap = df.index == end
        assert not is_overlap.any()

示例#10

0

显示文件

def test_search_invalid_n_examples(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
    )

    with pytest.raises(AssertionError, match='must specify gap'):
        next(lm.slice(transactions, num_examples_per_instance=2))

    with pytest.raises(AssertionError, match='must specify gap'):
        lm.search(transactions, num_examples_per_instance=2)

示例#11

0

显示文件

def test_slice_context(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=total_spent_fn,
        window_size='1h',
    )

    for ds in lm.slice(transactions,
                       num_examples_per_instance=-1,
                       verbose=True):
        assert ds.context.target_entity == 'customer_id'
        assert isinstance(ds.context.target_instance, int)
        assert isinstance(ds.context.slice_number, int)
        assert ds.context.window == ds.context.gap

示例#12

0

显示文件

def test_search_default(transactions, total_spent_fn):
    lm = LabelMaker(target_entity='customer_id', time_index='time', labeling_function=total_spent_fn)

    given_labels = lm.search(transactions, num_examples_per_instance=1)
    given_labels = to_csv(given_labels, index=False)

    labels = [
        'customer_id,time,total_spent',
        '0,2019-01-01 08:00:00,2',
        '1,2019-01-01 09:00:00,3',
        '2,2019-01-01 10:30:00,4',
        '3,2019-01-01 12:30:00,1',
    ]

    assert given_labels == labels

示例#13

0

显示文件

def test_invalid_threshold(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda: None,
        window_size=2,
    )

    match = 'invalid threshold'

    with pytest.raises(ValueError, match=match):
        lm.search(
            transactions,
            num_examples_per_instance=2,
            minimum_data=' ',
        )

示例#14

0

显示文件

def test_search_on_empty_labels(transactions):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda ds: None,
        window_size=2,
    )

    given_labels = lm.search(
        transactions,
        minimum_data=1,
        num_examples_per_instance=2,
        gap=1,
    )

    assert given_labels.empty

示例#15

0

显示文件

def test_search_offset_negative_1(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda: None,
        window_size=2,
    )

    match = 'offset must be positive'
    with pytest.raises(AssertionError, match=match):
        lm.search(
            transactions,
            num_examples_per_instance=2,
            minimum_data='-1h',
            gap='-1h',
        )

示例#16

0

显示文件

文件： test_label_maker.py 项目： anhuaxiang/compose

def test_search_offset_negative_0(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda: None,
        window_size=2,
    )

    match = 'must be greater than zero'

    with pytest.raises(AssertionError, match=match):
        lm.search(
            transactions,
            num_examples_per_instance=2,
            minimum_data=-1,
            gap=-1,
            verbose=False,
        )

示例#17

0

显示文件

def test_search_on_empty_data_slices(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda df: None,
        window_size=2,
    )

    transactions = transactions.assign(time=pd.NaT)

    given_labels = lm.search(
        transactions,
        minimum_data=1,
        num_examples_per_instance=2,
        gap=3,
    )

    assert given_labels.empty

示例#18

0

显示文件

文件： test_label_maker.py 项目： anhuaxiang/compose

def test_invalid_offset(transactions, total_spent_fn):
    match = 'invalid offset'

    with pytest.raises(AssertionError, match=match):
        LabelMaker(
            target_entity='customer_id',
            time_index='time',
            labeling_function=lambda: None,
            window_size={},
        )

示例#19

0

显示文件

文件： test_label_maker.py 项目： shalevy1/compose

def test_invalid_offset_alias(transactions, total_spent_fn):
    match = 'offset must be a valid string'

    with pytest.raises(AssertionError, match=match):
        LabelMaker(
            target_entity='customer_id',
            time_index='time',
            labeling_function=lambda: None,
            window_size='not an offset alias',
        )

示例#20

0

显示文件

def test_search_with_multiple_targets(transactions, total_spent_fn,
                                      unique_amounts_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        window_size=2,
        labeling_function={
            'total_spent': total_spent_fn,
            'unique_amounts': unique_amounts_fn,
        },
    )

    expected = [
        'customer_id,time,total_spent,unique_amounts',
        '0,2019-01-01 08:00:00,2,1',
        '1,2019-01-01 09:00:00,2,1',
        '1,2019-01-01 10:00:00,1,1',
        '2,2019-01-01 10:30:00,2,1',
        '2,2019-01-01 11:30:00,2,1',
        '3,2019-01-01 12:30:00,1,1',
    ]

    lt = lm.search(transactions, num_examples_per_instance=-1)
    actual = lt.pipe(to_csv, index=False)
    info = 'unexpected calculated values'
    assert actual == expected, info

    expected = [
        'customer_id,time,unique_amounts',
        '0,2019-01-01 08:00:00,1',
        '1,2019-01-01 09:00:00,1',
        '1,2019-01-01 10:00:00,1',
        '2,2019-01-01 10:30:00,1',
        '2,2019-01-01 11:30:00,1',
        '3,2019-01-01 12:30:00,1',
    ]

    actual = lt.select('unique_amounts')
    actual = actual.pipe(to_csv, index=False)
    info = 'selected values differ from calculated values'
    assert actual == expected, info

示例#21

0

显示文件

def labels():
    df = ft.demo.load_mock_customer(return_single_table=True, random_seed=0)
    df = df[['transaction_time', 'customer_id', 'amount']]
    df.sort_values('transaction_time', inplace=True)

    lm = LabelMaker(
        target_entity='customer_id',
        time_index='transaction_time',
        labeling_function=total_spent,
        window_size='1h',
    )

    lt = lm.search(
        df,
        minimum_data='10min',
        num_examples_per_instance=2,
        gap='30min',
        drop_empty=True,
        verbose=False,
    )

    lt = lt.threshold(1250)
    return lt

示例#22

0

显示文件

def test_search_with_invalid_index(transactions, total_spent_fn):
    lm = LabelMaker(
        target_entity='customer_id',
        time_index='time',
        labeling_function=lambda df: None,
        window_size=2,
    )

    df = transactions.sample(n=10, random_state=0)
    match = "data frame must be sorted chronologically"
    with pytest.raises(AssertionError, match=match):
        lm.search(df, num_examples_per_instance=2)

    df = transactions.assign(time=pd.NaT)
    match = "index contains null values"
    with pytest.raises(AssertionError, match=match):
        lm.search(df, num_examples_per_instance=2)