def test_incorporate_sampleid_existing():
    rng = np.random.RandomState()
    trcrpm = TRCRP_Mixture(chains=1, lag=2, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)

    new_frame = pd.DataFrame(
        [
            [nan, nan, 99.],
            [44., nan, nan],
            [21., nan, 88.],
        ],
        index=[1, 3, 9],
        columns=['a', 'b', 'c'],
    )
    trcrpm.incorporate(new_frame)
    tabulated_data = trcrpm.engine.states[0].data_array()
    expected_data = [
        [nan, nan, 86., nan, nan, 57., nan, nan, 65.],
        [nan, 86., 19., nan, 57., 62., nan, 65., 99.],
        [86., 19., 17., 57., 62., 41., 65., 99., 17.],
        [19., 17., 44., 62., 41., 7., 99., 17., 30.],
        [17., 44., 75., 41., 7., 1., 17., 30., 12.],
        [44., 75., 45., 7., 1., nan, 30., 12., 72.],
        [75., 45., 48., 1., nan, 77., 12., 72., 8.],
        [45., 48., 29., nan, 77., nan, 72., 8., 86.],
        [48., 29., 83., 77., nan, 46., 8., 86., 38.],
        [29., 83., 21., nan, 46., 54., 86., 38., 88.],
    ]
    assert np.allclose(tabulated_data, expected_data, equal_nan=True)
def test_timepoint_to_rowid():
    rng = np.random.RandomState(2)
    trcrpm = TRCRP_Mixture(chains=1, lag=0, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)
    for i in xrange(len(FRAME)):
        assert trcrpm._timepoint_to_rowid(i) == i
        assert trcrpm._timepoint_to_rowid(i) == i
def test_incorporate_sampleid_non_contiguous():
    rng = np.random.RandomState(1)
    trcrpm = TRCRP_Mixture(chains=1, lag=2, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)

    # Now incorporate an non-contiguous row skipping last_sampled_id+1.
    last_sampled_id = max(FRAME.index)
    frame_new = pd.DataFrame([[11, 12, float('nan')]],
                             columns=['a', 'b', 'c'],
                             index=[last_sampled_id + 2])
    trcrpm.incorporate(frame_new)
    tabulated_data = trcrpm.engine.states[0].data_array()
    expected_data = [
        [nan, nan, 86., nan, nan, 57., nan, nan, 65.],
        [nan, 86., 19., nan, 57., 62., nan, 65., nan],
        [86., 19., 17., 57., 62., 41., 65., nan, 17.],
        [19., 17., nan, 62., 41., 7., nan, 17., 30.],
        [17., nan, 75., 41., 7., 1., 17., 30., 12.],
        [nan, 75., 45., 7., 1., nan, 30., 12., 72.],
        [75., 45., 48., 1., nan, 77., 12., 72., 8.],
        [45., 48., 29., nan, 77., nan, 72., 8., 86.],
        [48., 29., 83., 77., nan, 46., 8., 86., 38.],
        [29., 83., nan, nan, 46., 54., 86., 38., nan],
        [nan, nan, 11, 54., nan, 12, nan, nan, nan],
    ]
    assert np.allclose(tabulated_data, expected_data, equal_nan=True)
def test_tabulate_lagged_data():
    rng = np.random.RandomState(2)

    trcrpm = TRCRP_Mixture(chains=1, lag=0, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)
    tabulated_data = trcrpm.engine.states[0].data_array()
    expected_data = DATA_RAW
    assert np.allclose(tabulated_data, DATA_RAW, equal_nan=True)

    trcrpm = TRCRP_Mixture(chains=1, lag=2, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)
    tabulated_data = trcrpm.engine.states[0].data_array()
    expected_data = [
        [nan, nan, 86., nan, nan, 57., nan, nan, 65.],
        [nan, 86., 19., nan, 57., 62., nan, 65., nan],
        [86., 19., 17., 57., 62., 41., 65., nan, 17.],
        [19., 17., nan, 62., 41., 7., nan, 17., 30.],
        [17., nan, 75., 41., 7., 1., 17., 30., 12.],
        [nan, 75., 45., 7., 1., nan, 30., 12., 72.],
        [75., 45., 48., 1., nan, 77., 12., 72., 8.],
        [45., 48., 29., nan, 77., nan, 72., 8., 86.],
        [48., 29., 83., 77., nan, 46., 8., 86., 38.],
        [29., 83., nan, nan, 46., 54., 86., 38., nan],
    ]
    assert np.allclose(tabulated_data, expected_data, equal_nan=True)
Пример #5
0
def test_get_dependence_probabilities_crash():
    rng = np.random.RandomState(1)
    trcrpm = TRCRP_Mixture(chains=4, lag=3, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)
    nvars = len(trcrpm.variables)
    dependencies = trcrpm.dependence_probability_pairwise()
    assert np.allclose(dependencies, np.ones((nvars, nvars)))
def test_get_temporal_regimes_crash():
    rng = np.random.RandomState(1)
    trcrpm = TRCRP_Mixture(chains=4, lag=3, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)
    for variable in trcrpm.variables:
        regimes_all = trcrpm.get_temporal_regimes(variable)
        assert np.shape(regimes_all) == (trcrpm.chains, len(trcrpm.dataset))
        regimes_some = trcrpm.get_temporal_regimes(variable,
                                                   timepoints=[0, 1, 2])
        assert np.shape(regimes_some) == (trcrpm.chains, 3)
Пример #7
0
def test_serialize_trcrp_mixture():
    rng = np.random.RandomState(1)
    trcrpm = TRCRP_Mixture(chains=4, lag=3, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)

    metadata = trcrpm.to_metadata()
    modulename, attributename = metadata['factory']
    module = importlib.import_module(modulename)
    builder = getattr(module, attributename)
    trcrpm2 = builder.from_metadata(metadata, seed=1)
    assert isinstance(trcrpm2, TRCRP_Mixture)
Пример #8
0
def test_trcrp_mixture_all_dependent():
    rng = np.random.RandomState(2)
    trcrpm = TRCRP_Mixture(chains=3, lag=3, variables=FRAME.columns, rng=rng)
    with pytest.raises(ValueError):
        # No data incorporated yet.
        trcrpm.resample_all(steps=10)
    trcrpm.incorporate(FRAME)
    trcrpm.resample_all(steps=10)
    for state in trcrpm.engine.states:
        assert len(state.views) == 1
    trcrpm.resample_hyperparameters(steps=5)
    regimes_a = trcrpm.get_temporal_regimes('a')
    regimes_b = trcrpm.get_temporal_regimes('b')
    regimes_c = trcrpm.get_temporal_regimes('c')
    assert np.all(regimes_a == regimes_b)
    assert np.all(regimes_a == regimes_c)
def test_simulate_dimensions():
    rng = np.random.RandomState(1)
    trcrpm = TRCRP_Mixture(chains=4, lag=3, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)

    def check_dims_correct(timepoints, variables, nsamples, simulator):
        samples = simulator(timepoints, variables, nsamples, multiprocess=0)
        # Number of chains is 4.
        assert len(samples) == nsamples * 4
        for sample in samples:
            assert len(sample) == len(timepoints)
            for subsample in sample:
                assert len(subsample) == len(variables)

    for simulator in [trcrpm.simulate, trcrpm.simulate_ancestral]:
        check_dims_correct([1], ['a'], 4, simulator)
        check_dims_correct([1], ['a', 'b'], 7, simulator)
        check_dims_correct([1, 2], ['a'], 1, simulator)
        check_dims_correct([1, 2, 3], ['c', 'b'], 10, simulator)
        check_dims_correct([9, 10, 11, 12], ['c', 'b'], 10, simulator)
def test_incorporate_sampleid_wedged():
    rng = np.random.RandomState(1)
    trcrpm = TRCRP_Mixture(chains=1, lag=2, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)

    # Now incorporate an non-contiguous row skipping last_sampled_id+1.
    last_sampled_id = max(FRAME.index)
    frame_new = pd.DataFrame([[11, 12, float('nan')]],
                             columns=['a', 'b', 'c'],
                             index=[last_sampled_id + 2])
    trcrpm.incorporate(frame_new)

    # Now bring back the missing observation at last_sampled_id+1.
    # XXX The xfailure happens in this step.
    frame_new = pd.DataFrame([[3, 3, 3]],
                             columns=['a', 'b', 'c'],
                             index=[last_sampled_id + 1])
    trcrpm.incorporate(frame_new)
    tabulated_data = trcrpm.engine.states[0].data_array()
    expected_data = [
        [nan, nan, 86., nan, nan, 57., nan, nan, 65.],
        [nan, 86., 19., nan, 57., 62., nan, 65., nan],
        [86., 19., 17., 57., 62., 41., 65., nan, 17.],
        [19., 17., nan, 62., 41., 7., nan, 17., 30.],
        [17., nan, 75., 41., 7., 1., 17., 30., 12.],
        [nan, 75., 45., 7., 1., nan, 30., 12., 72.],
        [75., 45., 48., 1., nan, 77., 12., 72., 8.],
        [45., 48., 29., nan, 77., nan, 72., 8., 86.],
        [48., 29., 83., 77., nan, 46., 8., 86., 38.],
        [29., 83., nan, nan, 46., 54., 86., 38., nan],
        [nan, 3., 11., 54., 3., 12., nan, 3., nan],  # row updated.
        [83., nan, 3., 46., 54., 3., 38., nan, 3.],  # new row.
    ]
    assert np.allclose(tabulated_data, expected_data, equal_nan=True)
def test_dependence_constraints():
    rng = np.random.RandomState(2)

    # All variables in TRCRP_Mixture are dependent.
    trcrpm = TRCRP_Mixture(chains=1, lag=0, variables=FRAME.columns, rng=rng)
    assert trcrpm._get_variable_dependence_constraints() == [[0, 1, 2]]

    # All variables in TRCRP_Mixture are dependent.
    trcrpm = TRCRP_Mixture(chains=1, lag=2, variables=FRAME.columns, rng=rng)
    assert trcrpm._get_variable_dependence_constraints() == \
        [[2,1,0, 5,4,3, 8,7,6]]

    # All variables in TRCRP_Mixture are dependent.
    trcrpm = TRCRP_Mixture(chains=1, lag=5, variables=FRAME.columns, rng=rng)
    assert trcrpm._get_variable_dependence_constraints() == \
        [[5,4,3,2,1,0, 11,10,9,8,7,6, 17,16,15,14,13,12]]

    # Lag 0 dependencies should skip singleton 'c'.
    trcrpm = Hierarchical_TRCRP_Mixture(chains=1,
                                        lag=0,
                                        variables=FRAME.columns,
                                        rng=rng,
                                        dependencies=[['a', 'b'], ['c']])
    assert trcrpm._get_variable_dependence_constraints() == [[0, 1]]

    # Lag 0 dependencies with all constrained.
    trcrpm = Hierarchical_TRCRP_Mixture(chains=1,
                                        lag=0,
                                        variables=FRAME.columns,
                                        rng=rng,
                                        dependencies=[['a', 'b', 'c']])
    assert trcrpm._get_variable_dependence_constraints() == [[0, 1, 2]]

    # Lag 1 dependencies with two constraints.
    trcrpm = Hierarchical_TRCRP_Mixture(chains=1,
                                        lag=1,
                                        variables=FRAME.columns,
                                        rng=rng,
                                        dependencies=[['a', 'b']])
    assert trcrpm._get_variable_dependence_constraints() == [[5, 4],
                                                             [1, 0, 3, 2]]

    # Lag 1 dependency with single constraint.
    trcrpm = Hierarchical_TRCRP_Mixture(chains=1,
                                        lag=1,
                                        variables=FRAME.columns,
                                        rng=rng,
                                        dependencies=[['a']])
    assert trcrpm._get_variable_dependence_constraints() == [[3, 2], [5, 4],
                                                             [1, 0]]

    # Multiple customer dependencies. Capturing a runtime error since
    # incorporate is going to use multiprocess so parallel_map captures the
    # ValueError and throws a RuntimeError.
    with pytest.raises(RuntimeError):
        trcrpm = Hierarchical_TRCRP_Mixture(chains=1,
                                            lag=0,
                                            variables=FRAME.columns,
                                            rng=rng,
                                            dependencies=[['a', 'c'],
                                                          ['a', 'b']])
        trcrpm.incorporate(FRAME)
def test_get_cgpm_constraints():
    rng = np.random.RandomState(2)
    trcrpm = TRCRP_Mixture(chains=1, lag=3, variables=FRAME.columns, rng=rng)
    trcrpm.incorporate(FRAME)

    # For reference, lag=3 implies the following tabulation
    # column a          column b        column c
    # 0 1 2 3           4 5 6 7         8 9 10 11

    # Incorporated timepoints should have no constraints.
    for timepoint in trcrpm.dataset.index:
        assert trcrpm._get_cgpm_constraints(timepoint) is None

    # Fresh timepoint 10 should have appropriate constraints.
    constraints_10 = trcrpm._get_cgpm_constraints(10)
    assert constraints_10 == {
        # column a      column b        column c
        0: 29,
        1: 83,
        5: 46,
        6: 54,
        8: 86,
        9: 38
    }
    # Incorporating data into timepoint 9 should update constraints for 10.
    frame_new = pd.DataFrame([[-12, nan, -12]],
                             columns=['a', 'b', 'c'],
                             index=[9])
    trcrpm.incorporate(frame_new)
    constraints_10 = trcrpm._get_cgpm_constraints(10)
    assert constraints_10 == {
        # column a            column b           column c
        0: 29,
        1: 83,
        2: -12,
        5: 46,
        6: 54,
        8: 86,
        9: 38,
        10: -12
    }

    # Fresh timepoint 12 should have appropriate constraints.
    constraints_12 = trcrpm._get_cgpm_constraints(12)
    assert constraints_12 == {
        # column a          column b            column c
        0: -12,
        4: 54,
        8: -12
    }
    # Incorporating data into timepoint 11 should update constraints for 12.
    frame_new = pd.DataFrame([[-44, -48, -47]],
                             columns=['a', 'b', 'c'],
                             index=[11])
    trcrpm.incorporate(frame_new)
    constraints_12 = trcrpm._get_cgpm_constraints(12)
    assert constraints_12 == {
        # column a          column b            column c
        0: -12,
        2: -44,
        4: 54,
        6: -48,
        8: -12,
        10: -47
    }
Пример #13
0
from trcrpm import TRCRP_Mixture
from collections import Counter
print "f**k yeah"
data = pd.read_csv("./data/anomaly0245.csv", index_col=0)
data = data.iloc[156600:240000].reset_index(drop=True)

# Setup the placekeeping and initilizing variables
chain = 0
x, eng_val, states, num_states = [], [], [], []
i = 0
step = 30
print(i)

rng = np.random.RandomState(1)
model = TRCRP_Mixture(chains=1, lag=10, variables=data.columns, rng=rng)
model.incorporate(data[i:i + step])
model.resample_all(seconds=10)
model.resample_hyperparameters(seconds=10)
s = model.get_temporal_regimes('anomaly')[chain]
num_states = step * [len(sorted(set(s)))]
states = list(s[i:i + step])
eng_val = data.iloc[i:i + step, 0].tolist()
x = list(range(i, i + step))

for i in range(step, len(data) - step, step):
    model.incorporate(data[i:i + step])
    model.resample_all(seconds=10)
    model.resample_hyperparameters(seconds=10)
    s = model.get_temporal_regimes("anomaly")[chain]
    num_states = step * [len(sorted(set(s)))]
    states = list(s[i:i + step])