Пример #1
0
def full_rank_time_series(request):
    """ Yields a time series of which the propagator has full rank (7 in this case as data is mean-free). """
    random_state = np.random.RandomState(42)
    d = 8
    Q = np.linalg.qr(random_state.normal(size=(d, d)))[0]
    K = Q @ (np.diag(np.arange(1, d + 1)).astype(np.float64) / d) @ Q.T
    model = TransferOperatorModel(K)
    x = np.ones((
        1,
        d,
    )) * 100000
    traj = [x]
    for _ in range(1000):
        traj.append(model.forward(traj[-1]))
    traj = np.concatenate(traj)
    if request.param == 'trajectory':
        return traj, traj
    elif request.param == 'time-lagged-ds':
        return traj, TimeLaggedDataset(traj[:-1], traj[1:])
    elif request.param == 'concat-time-lagged-ds':
        return traj, TimeLaggedConcatDataset(
            [TimeLaggedDataset(traj[:-1], traj[1:])])
    elif request.param == 'traj-ds':
        return traj, TrajectoryDataset(1, traj)
    elif request.param == 'trajs-ds':
        return traj, TrajectoriesDataset([TrajectoryDataset(1, traj)])
    else:
        raise ValueError(f"Unexpected request param {request.param}")
Пример #2
0
def test_estimator(fixed_seed):
    data = deeptime.data.ellipsoids()
    obs = data.observations(6000, n_dim=10).astype(np.float32)

    # set up the lobe
    lobe = nn.Sequential(nn.Linear(10, 1), nn.Tanh())
    # train the lobe
    opt = torch.optim.Adam(lobe.parameters(), lr=1e-2)
    for _ in range(50):
        for X, Y in deeptime.util.data.timeshifted_split(obs, lagtime=1, chunksize=512):
            opt.zero_grad()
            lval = vampnet_loss(lobe(torch.from_numpy(X)), lobe(torch.from_numpy(Y)))
            lval.backward()
            opt.step()

    # now let's compare
    lobe.eval()
    ds = TrajectoryDataset(1, obs)
    loader = DataLoader(ds, batch_size=512)
    loader_val = DataLoader(ds, batch_size=512)
    vampnet = VAMPNet(lobe=lobe)
    vampnet_model = vampnet.fit(loader, validation_loader=loader_val).fetch_model()
    assert_(len(vampnet.train_scores) > 0)
    assert_(len(vampnet.validation_scores) > 0)
    # reference model w/o learnt featurization
    projection = VAMP(lagtime=1, observable_transform=vampnet_model).fit(obs).transform(obs, propagate=True)

    dtraj = KMeans(2).fit(projection).transform(projection)
    msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model()

    np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
Пример #3
0
def test_estimator_fit(fixed_seed, dtype, shared_lobe):
    data = deeptime.data.ellipsoids()
    obs = data.observations(60000, n_dim=2).astype(dtype)
    train, val = torch.utils.data.random_split(TrajectoryDataset(1, obs), [50000, 9999])

    # set up the lobe
    linear_layer = nn.Linear(2, 1)
    lobe = nn.Sequential(linear_layer, nn.Tanh())

    with torch.no_grad():
        linear_layer.weight[0, 0] = -0.3030
        linear_layer.weight[0, 1] = 0.3060
        linear_layer.bias[0] = -0.7392

    if shared_lobe:
        lobe_t = None
    else:
        lobe_t = deepcopy(lobe)

    net = VAMPNet(lobe=lobe, dtype=dtype, learning_rate=1e-8, lobe_timelagged=lobe_t)
    train_loader = DataLoader(train, batch_size=512, shuffle=True)
    val_loader = DataLoader(val, batch_size=512)
    net.fit(train_loader, n_epochs=1, validation_loader=val_loader, validation_score_callback=lambda *x: x)

    # reference model w/o learnt featurization
    projection = VAMP(lagtime=1, observable_transform=net).fit(obs).fetch_model().transform(obs)

    dtraj = KMeans(2).fit(projection).transform(projection)
    msm_vampnet = MaximumLikelihoodMSM().fit(dtraj, lagtime=1).fetch_model()

    np.testing.assert_array_almost_equal(msm_vampnet.transition_matrix, data.msm.transition_matrix, decimal=2)
Пример #4
0
def test_no_side_effects():
    mlp = nn.Linear(10, 2)
    data = deeptime.data.ellipsoids()
    obs = data.observations(100, n_dim=10).astype(np.float32)
    net = VAMPNet(lobe=mlp, dtype=np.float32, learning_rate=1e-8)
    ds = TrajectoryDataset(1, obs)
    train_loader = DataLoader(ds, batch_size=512, shuffle=True)
    model1 = net.fit(train_loader, n_epochs=1).fetch_model()
    model2 = net.fit(train_loader, n_epochs=1).fetch_model()
    with torch.no_grad():
        assert_(model1.lobe is not model2.lobe)  # check it is not the same instance
Пример #5
0
def two_state_hmm():
    length = 1000
    transition_matrix = np.asarray([[0.9, 0.1], [0.1, 0.9]])
    msm = dt.markov.msm.MarkovStateModel(transition_matrix)
    dtraj = msm.simulate(length, seed=42)
    traj = np.random.randn(len(dtraj))
    traj[np.where(dtraj == 1)[0]] += 20.0
    traj_stacked = np.vstack((traj, np.zeros(len(traj))))
    phi = np.random.rand() * 2.0 * np.pi
    rot = np.asarray([[np.cos(phi), -np.sin(phi)], [np.sin(phi), np.cos(phi)]])
    traj_rot = np.dot(rot, traj_stacked).T

    ds = TrajectoryDataset(1, traj_rot.astype(np.float32))
    return traj, traj_rot, ds
Пример #6
0
def test_timelagged_dataset_multitraj(lagtime, ntraj, stride, start, stop):
    data = [
        np.random.normal(size=(7, 3)),
        np.random.normal(size=(555, 3)),
        np.random.normal(size=(55, 3))
    ]
    data = data[:ntraj]
    assert_(len(data) == ntraj)
    with assert_raises(AssertionError):
        TrajectoryDataset.from_trajectories(1, [])  # empty data
    with assert_raises(AssertionError):
        TrajectoryDataset.from_trajectories(lagtime=7,
                                            data=data)  # lagtime too long
    with assert_raises(AssertionError):
        TrajectoryDataset.from_trajectories(lagtime=1,
                                            data=data + [np.empty(
                                                (55, 7))])  # shape mismatch
    ds = TrajectoryDataset.from_trajectories(lagtime=lagtime, data=data)
    assert len(ds) == sum(len(data[i]) - lagtime for i in range(len(data)))

    # Iterate over data and see if it is the same as iterating over dataset
    out_full = ds[::]
    out_strided = ds[start:stop:stride]

    # we manually iterate over trajectories and collect them in time-lagged fashion
    X = []
    Y = []
    for traj in data:
        X.append(traj[:-lagtime])
        Y.append(traj[lagtime:])
    X = np.concatenate(X)[start:stop:stride]
    Y = np.concatenate(Y)[start:stop:stride]

    # check that manually collected and dataset yielded data coincide
    assert_equal(len(X), len(out_strided[0]))
    assert_equal(len(Y), len(out_strided[1]))
    assert_array_almost_equal(X, out_strided[0])
    assert_array_almost_equal(Y, out_strided[1])

    # get array of indices based on slice
    slice_obj = slice(start, stop, stride).indices(len(ds))
    indices = np.array(range(*slice_obj))

    # iterate over indices
    for ix in indices:
        x, y = ds[ix]
        # check this against full output
        assert_equal(x, out_full[0][ix])
        assert_equal(y, out_full[1][ix])
Пример #7
0
def test_timelagged_dataset(lagtime):
    pytest.importorskip("torch.utils.data")
    import torch.utils.data as data_utils
    data = np.arange(5000)
    ds = TrajectoryDataset(lagtime, data)
    np.testing.assert_equal(len(ds), 5000 - lagtime)
    sub_datasets = data_utils.random_split(ds, [1000, 2500, 1500 - lagtime])

    collected_data = []
    for sub_dataset in sub_datasets:
        loader = data_utils.DataLoader(sub_dataset, batch_size=123)
        for batch in loader:
            if lagtime > 0:
                np.testing.assert_(isinstance(batch, (list, tuple)))
                collected_data.append(batch[0].numpy())
                collected_data.append(batch[1].numpy())
            else:
                collected_data.append(batch.numpy())
    collected_data = np.unique(np.concatenate(collected_data))
    np.testing.assert_equal(len(np.setdiff1d(collected_data, data)), 0)
Пример #8
0
@contextmanager
def does_not_raise():
    yield


@pytest.mark.parametrize(
    "data,lagtime,expectation",
    [(np.zeros((100, 5)), 5, does_not_raise()),
     (np.zeros((100, 5)), None, assert_raises(ValueError)),
     (np.zeros((100, 5)), 0, assert_raises(AssertionError)),
     (np.zeros((100, 5)), 96, assert_raises(AssertionError)),
     ((np.zeros((100, 5)), np.zeros((100, 5))), None, does_not_raise()),
     ((np.zeros((100, 5)), np.zeros(
         (105, 5))), None, assert_raises(AssertionError)),
     (TrajectoryDataset.from_trajectories(
         5, [np.zeros((55, 5)), np.zeros((55, 5))]), None, does_not_raise())],
    ids=[
        "Trajectory with lagtime",
        "Trajectory without lagtime",
        "Trajectory with zero lagtime",
        "Trajectory with too large lagtime",
        "X-Y tuple of data",
        "X-Y tuple of data, length mismatch",
        "Custom concat dataset of list of trajectories",
    ])
def test_to_dataset(data, lagtime, expectation):
    with expectation:
        ds = to_dataset(data, lagtime=lagtime)
        assert_(len(ds) in (100, 95))
        data = ds[:]
        assert_equal(len(data), 2)