Exemplo n.º 1
0
def test_sum_join_multiple_datasets_undermatching():
    workers = deepcopy(WORKERS_DATASET_DEF)
    workers.name = 'WORKERS'
    more = deepcopy(MOREWORKERS_DATASET_DEF)
    more._add_var(
        v('joined', 'sum(join(name == WORKERS.name, WORKERS.max_production))'))

    sim = run_sim(
        [],
        datasets=[more, workers],
    )
    # Second row of MOREWORKERS doesn't match any rows in WORKERS, hence sum is 0
    sim.assert_values_match(
        'MOREWORKERS.joined',
        [200, 0, 100],
        t=1,
    )
Exemplo n.º 2
0
def test_time_indexing_dataset_vars():
    db = deepcopy(ASSETS_DATASET_DEF)
    db._add_var(v('ds_var1', 'EXAMPLE_LABEL.factor_1'))
    db._add_var(v('ds_var2', 'ds_var1[t-1]', '0'))

    sim = run_sim(
        [
            v('lagged_var',
              'sum(EXAMPLE_LABEL.ds_var1[t-1] + EXAMPLE_LABEL.ds_var2)', '0')
        ],
        policies=pols(attribute1=['1']),
        dataset=db,
    )
    sim.assert_values_match(
        'lagged_var',
        [0, 48, 48, 48],
    )
Exemplo n.º 3
0
def test_multiclass_classifier():
    vars = [
        {
            'short_name': 'prob',
            'equation': 'CLF.predict_proba(t)'
        },
    ]
    # A dummy classifier with 3 classes. Not supported.
    sim = run_sim(
        [
            v('prob', 'CLF.predict_proba(t)'),
        ],
        models=clf_factory('dummy_multiclass.pickle'),
        allow_errs=True,
    )
    sim.assert_errors_match([['CLF', '', 'binary']])
    sim.assert_null_values('prob')
Exemplo n.º 4
0
def test_find_threshold_iterative():
    #Test the find_threshold_iterative() with
    #multiple capacities.

    data = setUp()
    capacities = range(-50, 10**3, 50)
    precision = .001

    for capacity in capacities:
        threshold = find_threshold_iterative(data, capacity, precision)
        threshold = round_decimals_up(threshold, 3)
        print(capacity, threshold)

        if capacity < 0:
            assert threshold == -1
            continue

        assert run_sim(data, threshold, capacity)
Exemplo n.º 5
0
def test_sklearn_pipeline():
    """insurance_churn_model.pkl is an sklearn pipeline, with a classifier on
    the tail end. It takes as input [number, number, number, number, string].
    """
    ds = deepcopy(INSURANCE_DATASET_DEF)
    ds._add_var(
        v(
            'churn',
            'CLF.predict(t, t, initial_premium, initial_total_claims, location)'
        ))

    sim = run_sim(
        [],
        models=clf_factory('insurance_churn_model.pkl'),
        dataset=ds,
    )
    df = sim.dataset_df()
    assert df.churn.isin([0, 1]).all()
Exemplo n.º 6
0
def test_join_fn_reduced_dim_boolean_expr():
    """Verify that joins work even when the mask (the first arg) doesn't naturally
    have the shape of a dataset variable.
    """
    # This is another overmatching join, but it has the unusual feature that
    # the first arg does not have dataset dimensionality. Want to make sure
    # we just raise a user-visible join exception, rather than a hard crash.
    sim = run_sim(
        [
            v('a_var', 't+4'),
            v('b_var', "join(1==1, EXAMPLE_LABEL.factor_2)"),
        ],
        dataset=ASSETS_DATASET_DEF,
        num_sims=2,
        num_steps=3,
        allow_errs=True,
    )
    sim.assert_errors_match([['b_var', 'equation', 'Join']])
Exemplo n.º 7
0
def test_sum_join_multiple_datasets_overmatching():
    workers = deepcopy(WORKERS_DATASET_DEF)
    workers.name = 'WORKERS'
    more = deepcopy(MOREWORKERS_DATASET_DEF)
    workers._add_var(
        v('joined',
          'sum(join(workers == MOREWORKERS.borkers, MOREWORKERS.widgets))'))
    sim = run_sim(
        [],
        datasets=[more, workers],
        num_sims=2,
    )
    # Second row of WORKERS matches two rows of MOREWORKERS having values of 20 and 2
    sim.assert_values_match(
        'WORKERS.joined',
        [10, 20 + 2],
        t=1,
    )
Exemplo n.º 8
0
def test_join_multiple_datasets_bijection():
    """A join involving two datasets which happen to have a perfect 1-to-1 correspondence.
    """
    workers = deepcopy(WORKERS_DATASET_DEF)
    workers.name = 'WORKERS'
    para = PARAWORKERS_DATASET_DEF
    workers._add_var(
        v('joined', 'join(name == PARAWORKERS.name, PARAWORKERS.widgets)'))

    sim = run_sim(
        [],
        datasets=[para, workers],
    )
    sim.assert_values_match(
        'WORKERS.joined',
        [10, 20],
        t=1,
    )
Exemplo n.º 9
0
def test_external_model_basic():
    # A regression model taking three inputs.
    model = {
        "label": "DEMAND_MODEL",
        "localPath": "tests/sample_models/three_input_model.pkl",
    }
    sim = run_sim(
        [
            v('competitor_price', 'uniform(100, 200)'),
            v('temperature', 'uniform(20, 30)'),
            v('demand',
              'DEMAND_MODEL.predict(competitor_price, temperature, t)'),
        ],
        model=model,
    )
    df = sim.results
    assert df.demand.isnull().sum() == 0
    assert df.demand.std() > 0
Exemplo n.º 10
0
def test_sum_join_multiple_datasets_bijection():
    workers = deepcopy(WORKERS_DATASET_DEF)
    workers.name = 'WORKERS'
    para = deepcopy(PARAWORKERS_DATASET_DEF)
    para.name = 'PARAWORKERS'
    workers._add_var(
        v('joined',
          'sum(join(name == PARAWORKERS.name, PARAWORKERS.widgets))'))

    sim = run_sim(
        [],
        datasets=[para, workers],
    )
    # Because this join is 1-1 the sum() should be a no-op
    sim.assert_values_match(
        'WORKERS.joined',
        [10, 20],
        t=1,
    )
Exemplo n.º 11
0
def test_join_multiple_datasets_injection():
    """Similar to above scenario, except the dataset we're joining to has an
    extra row which doesn't participate in the join results
    """
    workers = deepcopy(WORKERS_DATASET_DEF)
    workers.name = 'WORKERS'
    more = deepcopy(MOREWORKERS_DATASET_DEF)
    workers._add_var(
        v('joined', 'join(name == MOREWORKERS.name, MOREWORKERS.widgets)'))

    sim = run_sim(
        [],
        datasets=[more, workers],
    )
    sim.assert_values_match(
        'WORKERS.joined',
        [10, 20],
        t=1,
    )
Exemplo n.º 12
0
def test_with_dataset_formulas_accessing_raw_data_and_attributes_and_vars():
    db = deepcopy(WORKERS_DATASET_DEF)
    db._add_var(
        v(
            'rev_per_worker',
            "EXAMPLE_LABEL.max_production / EXAMPLE_LABEL.workers * price_per_unit * a_var",
        ))
    sim = run_sim(
        [v('a_var', 't/10')],
        policies=pols(price_per_unit=['10', '5']),
        dataset=db,
        num_sims=4,
    )
    sim.assert_values_match(
        'EXAMPLE_LABEL.rev_per_worker',
        [50, 10 / 3],
        t=1,
        policy=1,
    )
Exemplo n.º 13
0
def test_with_dataset_formulas_accessing_raw_data():
    db = deepcopy(WORKERS_DATASET_DEF)
    db._add_var(v('t_again', 't'))

    sim = run_sim(
        policies=pols(price_per_unit=['10', '1']),
        dataset=db,
    )
    sim.assert_values_match(
        'EXAMPLE_LABEL.t_again',
        [
            1,
            1,
            2,
            2,
            3,
            3,
        ],
        policy=0,
    )
Exemplo n.º 14
0
def test_probabilistic_sampling():
    """Ensure that .predict for a classifier samples rather than returning the argmax.
    """
    vars = [
        v('x', 'uniform(0, 1)'),
        v('prob', 'CLF.predict_proba(x[t])'),
        v('pred', 'CLF.predict(x[t])'),
    ]
    sim = run_sim(
        vars,
        models=clf_factory('usually_positive_classifier.pickle'),
        num_steps=1000,
    )
    # We trained a logistic classifier on noise in (0, 1), and with 90% positive
    # labels. So all our predicted probabilities should be around .9, but we
    # should still almost always get at least one negative prediction.
    # (NB: There's a ~1e-46 of this test spuriously failing.)
    df = sim.results
    assert (df.prob > .5).all()
    assert 0 in df.pred
    assert 1 in df.pred
Exemplo n.º 15
0
def test_sum_join_multiple_datasets_undermatching_multiple_policies():
    """NB: because dimensions of size 1 get a uniquely lax treatment under
    numpy broadcasting rules, it's important to include some tests with >1
    policy/sim, even if the quantity being tested has no dependence on policy
    attributes or inter-simulation randomness.
    """
    workers = deepcopy(WORKERS_DATASET_DEF)
    workers.name = 'WORKERS'
    more = deepcopy(MOREWORKERS_DATASET_DEF)
    more._add_var(
        v('joined', 'sum(join(name == WORKERS.name, WORKERS.max_production))'))
    sim = run_sim(
        [],
        policies=DUMMY_POLS,
        datasets=[more, workers],
        num_sims=5,
    )
    # Second row of MOREWORKERS doesn't match any rows in WORKERS, hence sum is 0
    sim.assert_values_match(
        'MOREWORKERS.joined',
        [200, 0, 100],
        t=1,
        policy=0,
    )
Exemplo n.º 16
0
def test_with_dataset_formulas_accessing_attributes():
    db = deepcopy(WORKERS_DATASET_DEF)
    db._add_var(
        v(
            'rev_per_worker',
            "EXAMPLE_LABEL.max_production / EXAMPLE_LABEL.workers * price_per_unit"
        ))

    sim = run_sim(
        [],
        policies=pols(price_per_unit=['10', '5']),
        dataset=db,
        num_sims=4,
    )
    sim.assert_values_match_across_time(
        'EXAMPLE_LABEL.rev_per_worker',
        [1000, 200 / 3],
        policy=0,
    )
    sim.assert_values_match_across_time(
        'EXAMPLE_LABEL.rev_per_worker',
        [500, 100 / 3],
        policy=1,
    )
Exemplo n.º 17
0
def test_arithmetic_index_expr():
    sim = run_sim([
        v('x', 't', '0'),
        v('y', 'x[5-5]'),
    ])
    sim.assert_values_match('y', [0, 0, 0, 0])
Exemplo n.º 18
0
def test_constant_index_expr():
    sim = run_sim([v('x', 't', '0'), v('y', 'x[0]')])
    sim.assert_values_match('y', [0, 0, 0, 0])
Exemplo n.º 19
0
def test_multiline_formula():
    sim = run_sim([v('x', '1 +\n1')], )
    sim.assert_values_match(
        'x', 2)  # broadcasting comparison - should be 2 everywhere
Exemplo n.º 20
0
def test_fibonacci():
    sim = run_sim([v('a', 'a[t-1] + a[t-2]', initial=1)], num_steps=5)
    sim.assert_values_match('a', [1, 2, 3, 5, 8, 13])