def test_detach_trick(): path = torch.rand(1, 10, 3) interp = torchcde.CubicSpline(torchcde.natural_cubic_coeffs(path)) func = _Func(input_size=3, hidden_size=3) for adjoint in (True, False): variable_grads = [] z0 = torch.rand(1, 3) for t_grad in (True, False): t_ = torch.tensor([0., 9.], requires_grad=t_grad) # Don't test dopri5. We will get different results then, because the t variable will force smaller step # sizes and thus slightly different results. z = torchcde.cdeint(X=interp, z0=z0, func=func, t=t_, adjoint=adjoint, method='rk4', options=dict(step_size=0.5)) z[:, -1].sum().backward() variable_grads.append(func.variable.grad.clone()) func.variable.grad.zero_() for elem in variable_grads[1:]: assert (elem == variable_grads[0]).all()
def test_grad_paths(): for method in ('rk4', 'dopri5'): for adjoint in (True, False): t = torch.linspace(0, 9, 10, requires_grad=True) path = torch.rand(1, 10, 3, requires_grad=True) coeffs = torchcde.natural_cubic_coeffs(path, t) cubic_spline = torchcde.NaturalCubicSpline(coeffs, t) z0 = torch.rand(1, 3, requires_grad=True) func = _Func(input_size=3, hidden_size=3) t_ = torch.tensor([0., 9.], requires_grad=True) z = torchcde.cdeint(X=cubic_spline, func=func, z0=z0, t=t_, adjoint=adjoint, method=method, rtol=1e-4, atol=1e-6) assert z.shape == (1, 2, 3) assert t.grad is None assert path.grad is None assert z0.grad is None assert func.variable.grad is None assert t_.grad is None z[:, 1].sum().backward() assert isinstance(t.grad, torch.Tensor) assert isinstance(path.grad, torch.Tensor) assert isinstance(z0.grad, torch.Tensor) assert isinstance(func.variable.grad, torch.Tensor) assert isinstance(t_.grad, torch.Tensor)
def test_tuple_input(): xa = torch.rand(2, 10, 2) xb = torch.rand(10, 1) coeffs_a = torchcde.natural_cubic_coeffs(xa) coeffs_b = torchcde.natural_cubic_coeffs(xb) spline_a = torchcde.NaturalCubicSpline(coeffs_a) spline_b = torchcde.NaturalCubicSpline(coeffs_b) X = torchcde.TupleControl(spline_a, spline_b) def func(t, z): za, zb = z return za.sigmoid().unsqueeze(-1).repeat_interleave(2, dim=-1), zb.tanh().unsqueeze(-1) z0 = torch.rand(2, 3), torch.rand(5, requires_grad=True) out = torchcde.cdeint(X=X, func=func, z0=z0, t=X.interval, adjoint_params=()) out[0].sum().backward() assert (z0[1].grad == 0).all()
def main(num_epochs=30): train_X, train_y = get_data() ###################### # input_channels=3 because we have both the horizontal and vertical position of a point in the spiral, and time. # hidden_channels=8 is the number of hidden channels for the evolving z_t, which we get to choose. # output_channels=1 because we're doing binary classification. ###################### model = NeuralCDE(input_channels=3, hidden_channels=8, output_channels=1) optimizer = torch.optim.Adam(model.parameters()) ###################### # Now we turn our dataset into a continuous path. We do this here via natural cubic spline interpolation. # The resulting `train_coeffs` is a tensor describing the path. # For most problems, it's probably easiest to save this tensor and treat it as the dataset. ###################### train_coeffs = torchcde.natural_cubic_coeffs(train_X) train_dataset = torch.utils.data.TensorDataset(train_coeffs, train_y) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32) for epoch in range(num_epochs): for batch in train_dataloader: batch_coeffs, batch_y = batch pred_y = model(batch_coeffs).squeeze(-1) loss = torch.nn.functional.binary_cross_entropy_with_logits( pred_y, batch_y) loss.backward() optimizer.step() optimizer.zero_grad() print('Epoch: {} Training loss: {}'.format(epoch, loss.item())) test_X, test_y = get_data() test_coeffs = torchcde.natural_cubic_coeffs(test_X) pred_y = model(test_coeffs).squeeze(-1) binary_prediction = (torch.sigmoid(pred_y) > 0.5).to(test_y.dtype) prediction_matches = (binary_prediction == test_y).to(test_y.dtype) proportion_correct = prediction_matches.sum() / test_y.size(0) print('Test Accuracy: {}'.format(proportion_correct))
def test_prod(): x = torch.rand(2, 5, 1) X = torchcde.NaturalCubicSpline(torchcde.natural_cubic_coeffs(x)) class F: def prod(self, t, z, dXdt): assert t.shape == () assert z.shape == (2, 3) assert dXdt.shape == (2, 1) return -z * dXdt z0 = torch.rand(2, 3, requires_grad=True) out = torchcde.cdeint(X=X, func=F(), z0=z0, t=X.interval, adjoint_params=()) out.sum().backward()
def test_shape(): for method in ('rk4', 'dopri5'): for _ in range(10): num_points = torch.randint(low=5, high=100, size=(1, )).item() num_channels = torch.randint(low=1, high=3, size=(1, )).item() num_hidden_channels = torch.randint(low=1, high=5, size=(1, )).item() num_batch_dims = torch.randint(low=0, high=3, size=(1, )).item() batch_dims = [] for _ in range(num_batch_dims): batch_dims.append( torch.randint(low=1, high=3, size=(1, )).item()) values = torch.rand(*batch_dims, num_points, num_channels) coeffs = torchcde.natural_cubic_coeffs(values) spline = torchcde.NaturalCubicSpline(coeffs) class _Func(torch.nn.Module): def __init__(self): super(_Func, self).__init__() self.variable = torch.nn.Parameter( torch.rand(*[1 for _ in range(num_batch_dims)], 1, num_channels)) def forward(self, t, z): return z.sigmoid().unsqueeze(-1) + self.variable f = _Func() z0 = torch.rand(*batch_dims, num_hidden_channels) num_out_times = torch.randint(low=2, high=10, size=(1, )).item() start, end = spline.interval out_times = torch.rand(num_out_times, dtype=torch.float64).sort( ).values * (end - start) + start options = {} if method == 'rk4': options['step_size'] = 1. / num_points out = torchcde.cdeint(spline, f, z0, out_times, method=method, options=options, rtol=1e-4, atol=1e-6) assert out.shape == (*batch_dims, num_out_times, num_hidden_channels)
def forward(self, x): # NOTE: x should be the natural cubic spline coefficients. Look into datasets.py for how to generate these. x = torchcde.natural_cubic_coeffs(x) if self.interpolation == "cubic": x = torchcde.NaturalCubicSpline(x) elif self.interpolation == "linear": x = torchcde.LinearInterpolation(x) else: raise ValueError("invalid interpolation given") x0 = x.evaluate(x.interval[0]) z0 = self.initial(x0) zt = torchcde.cdeint(X=x, func=self.model, z0=z0, t=x.interval) return self.output(zt[..., -1, :])
def test_backend(): x = torch.randn(1, 10, 2) coeffs = torchcde.natural_cubic_coeffs(x) X = torchcde.CubicSpline(coeffs) def func(t, z): return -z.unsqueeze(-1).expand(1, 3, 2) z0 = torch.randn(1, 3) torchdiffeq_out = torchcde.cdeint(X=X, func=func, z0=z0, t=X.interval, backend="torchdiffeq", method="midpoint", options=dict(step_size=1.0)) torchsde_out = torchcde.cdeint(X=X, func=func, z0=z0, t=X.interval, backend="torchsde", method="midpoint", dt=1.0) torch.testing.assert_allclose(torchdiffeq_out, torchsde_out)
def backward(ctx, *grad_output): sol, t_span, t_sol = ctx.saved_tensors vf_params = torch.cat([p.contiguous().flatten() for p in vf.parameters()]) # initialize adjoint state xT, λT, μT = sol[-1], grad_output[-1][-1], torch.zeros_like(vf_params) λT_nel, μT_nel = λT.numel(), μT.numel() xT_shape, λT_shape, μT_shape = xT.shape, λT.shape, μT.shape A = torch.cat([λT.flatten(), μT.flatten()]) spline_coeffs = natural_cubic_coeffs(x=sol.permute(1, 0, 2).detach(), t=t_sol) x_spline = NaturalCubicSpline(coeffs=spline_coeffs, t=t_sol) # define adjoint dynamics def adjoint_dynamics(t, A): if len(t.shape) > 0: t = t[0] x = x_spline.evaluate(t).requires_grad_(True) t = t.requires_grad_(True) λ, μ = A[:λT_nel], A[-μT_nel:] λ, μ = λ.reshape(λT.shape), μ.reshape(μT.shape) with torch.set_grad_enabled(True): dx = vf(t, x) dλ, dt, *dμ = tuple(grad(dx, (x, t) + tuple(vf.parameters()), -λ, allow_unused=True, retain_graph=False)) if integral_loss: dg = torch.autograd.grad(integral_loss(t, x).sum(), x, allow_unused=True, retain_graph=True)[0] dλ = dλ - dg dμ = torch.cat([el.flatten() if el is not None else torch.zeros(1) for el in dμ], dim=-1) return torch.cat([dλ.flatten(), dμ.flatten()]) # solve the adjoint equation n_elements = (λT_nel, μT_nel) for i in range(len(t_span) - 1, 0, -1): t_adj_sol, A = odeint(adjoint_dynamics, A, t_span[i - 1:i + 1].flip(0), solver, atol=atol, rtol=rtol) # prepare adjoint state for next interval A = torch.cat([A[-1, :λT_nel], A[-1, -μT_nel:]]) A[:λT_nel] += grad_output[-1][i - 1].flatten() λ, μ = A[:λT_nel], A[-μT_nel:] λ, μ = λ.reshape(λT.shape), μ.reshape(μT.shape) return (μ, λ, None, None)
def _solve_cde(x): # x should be a tensor of shape (..., length, channels), and may have missing data represented by NaNs. # Create dataset coeffs = torchcde.natural_cubic_coeffs(x) # Create model input_channels = x.size(-1) hidden_channels = 4 # hyperparameter, we can pick whatever we want for this output_channels = 10 # e.g. to perform 10-way multiclass classification class F(torch.nn.Module): def __init__(self): super(F, self).__init__() # For illustrative purposes only. You should usually use an MLP or something. A single linear layer won't be # that great. self.linear = torch.nn.Linear(hidden_channels, hidden_channels * input_channels) def forward(self, t, z): batch_dims = z.shape[:-1] return self.linear(z).tanh().view(*batch_dims, hidden_channels, input_channels) class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.initial = torch.nn.Linear(input_channels, hidden_channels) self.func = F() self.readout = torch.nn.Linear(hidden_channels, output_channels) def forward(self, coeffs): X = torchcde.NaturalCubicSpline(coeffs) X0 = X.evaluate(X.interval[0]) z0 = self.initial(X0) zt = torchcde.cdeint(X=X, func=self.func, z0=z0, t=X.interval) zT = zt[..., -1, :] # get the terminal value of the CDE return self.readout(zT) model = Model() # Run model return model(coeffs)
def _solve_cde(x): # x should be of shape (batch, length, channels) # Create dataset coeffs = torchcde.natural_cubic_coeffs(x) # Create model batch_size = x.size(0) input_channels = x.size(2) hidden_channels = 4 # hyperparameter, we can pick whatever we want for this output_channels = 4 # e.g. to perform 4-way multiclass classification class F(torch.nn.Module): def __init__(self): super(F, self).__init__() self.linear = torch.nn.Linear(hidden_channels, hidden_channels * input_channels) def forward(self, t, z): return self.linear(z).view(batch_size, hidden_channels, input_channels) class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.initial = torch.nn.Linear(input_channels, hidden_channels) self.func = F() self.readout = torch.nn.Linear(hidden_channels, output_channels) def forward(self, coeffs): X = torchcde.NaturalCubicSpline(coeffs) X0 = X.evaluate(X.interval[0]) z0 = self.initial(X0) zt = torchcde.cdeint(X=X, func=self.func, z0=z0, t=X.interval) zT = zt[:, -1] # get the terminal value of the CDE return self.readout(zT) model = Model() # Run model model(coeffs)
def test_shape(backend, method, kwargs): for _ in range(5): num_points = torch.randint(low=5, high=100, size=(1,)).item() num_channels = torch.randint(low=1, high=3, size=(1,)).item() num_hidden_channels = torch.randint(low=1, high=5, size=(1,)).item() if backend == "torchdiffeq": num_batch_dims = torch.randint(low=0, high=3, size=(1,)).item() batch_dims = [] for _ in range(num_batch_dims): batch_dims.append(torch.randint(low=1, high=3, size=(1,)).item()) elif backend == "torchsde": num_batch_dims = 1 batch_dims = [torch.randint(low=1, high=3, size=(1,)).item()] else: raise ValueError values = torch.rand(*batch_dims, num_points, num_channels) coeffs = torchcde.natural_cubic_coeffs(values) spline = torchcde.CubicSpline(coeffs) class _Func(torch.nn.Module): def __init__(self): super(_Func, self).__init__() self.variable = torch.nn.Parameter(torch.rand(*[1 for _ in range(num_batch_dims)], 1, num_channels)) def forward(self, t, z): return z.sigmoid().unsqueeze(-1) + self.variable f = _Func() z0 = torch.rand(*batch_dims, num_hidden_channels) num_out_times = torch.randint(low=2, high=10, size=(1,)).item() start, end = spline.interval out_times = torch.rand(num_out_times, dtype=torch.float64).sort().values * (end - start) + start out = torchcde.cdeint(spline, f, z0, out_times, backend=backend, method=method, rtol=1e-1, atol=1e-1, **kwargs) assert out.shape == (*batch_dims, num_out_times, num_hidden_channels)
def load_dataset( ds="activity", timestamps=True, coeffs=False, irregular=True, transpose=False, batch_size=128, data_dir="../data/person", ): """Obtains dataloaders for training diiferent networks on different datasets Args: ds: dataset to load. Options: activity/p300. timestamps: whether to have timestamps in dataloader. some architectures need it, some - don't. coeffs: whether to have features as raw data or its cubic pline coeffs. Needed for Neural CDE. irregular: whether to make the dataset irregular by dropping 20% of it's values. transpose: if False batch shape is (batch, seq_len, channels), if True -- (batch, channels, seq_len) batch_size: simply batch size. data_dir: directory, where data files are stored. """ if ds == "activity": dataset = PersonData(data_dir=data_dir) train_ts = torch.Tensor(dataset.train_t) test_ts = torch.Tensor(dataset.test_t) elif ds == "p300": dataset = P300Dataset(data_dir=data_dir) dataset.get_data_for_experiments(True) train_ts = torch.Tensor(dataset.train_t)[:, :, None] test_ts = torch.Tensor(dataset.test_t)[:, :, None] else: raise ValueError(f'No such dataset: {ds}, try "activity" or "p300"') train_x = torch.Tensor(dataset.train_x) test_x = torch.Tensor(dataset.test_x) train_y = torch.LongTensor(dataset.train_y) test_y = torch.LongTensor(dataset.test_y) # TODO make it function if irregular: seq_len_new = int(train_ts.size(1) * 0.8) new_ts = torch.zeros((train_ts.size(0), seq_len_new, 1)) new_x = torch.zeros((train_x.size(0), seq_len_new, train_x.size(2))) new_y = torch.zeros((train_y.size(0), seq_len_new)) for ts in range(len(train_ts)): irr_idc_train = np.random.choice( np.arange(0, train_ts.size(1)), size=int(0.8 * train_ts.size(1)), replace=False, ) irr_idc_train.sort() new_ts[ts, :, :] = train_ts[ts, irr_idc_train, :] new_x[ts, :, :] = train_x[ts, irr_idc_train, :] if train_y.dim() > 1: new_y[ts, :] = train_y[ts, irr_idc_train] train_ts = new_ts.clone() train_x = new_x.clone() if train_y.dim() > 1: train_y = new_y.clone().to(torch.long) seq_len_new = int(test_ts.size(1) * 0.8) new_ts = torch.zeros((test_ts.size(0), seq_len_new, 1)) new_x = torch.zeros((test_x.size(0), seq_len_new, test_x.size(2))) new_y = torch.zeros((test_y.size(0), seq_len_new)) for ts in range(len(test_ts)): irr_idc_test = np.random.choice( np.arange(0, test_ts.size(1)), size=int(0.8 * test_ts.size(1)), replace=False, ) irr_idc_test.sort() new_ts[ts, :, :] = test_ts[ts, irr_idc_test, :] new_x[ts, :, :] = test_x[ts, irr_idc_test, :] if test_y.dim() > 1: new_y[ts, :] = test_y[ts, irr_idc_test] test_ts = new_ts.clone() test_x = new_x.clone() if test_y.dim() > 1: test_y = new_y.clone().to(torch.long) # TODO make it function if transpose: xs = train_x.shape train_x = train_x.reshape(xs[0], xs[2], xs[1]) xs = test_x.shape test_x = test_x.reshape(xs[0], xs[2], xs[1]) in_features = train_x.size(-1) if coeffs: train_x = torch.cat([train_ts, train_x], dim=2) train_x = torchcde.natural_cubic_coeffs(torch.Tensor(dataset.train_x)) test_x = torch.cat([test_ts, test_x], dim=2) test_x = torchcde.natural_cubic_coeffs(torch.Tensor(dataset.test_x)) if timestamps: train = data.TensorDataset(train_x, train_ts, train_y) test = data.TensorDataset(test_x, test_ts, test_y) else: train = data.TensorDataset(train_x, train_y) test = data.TensorDataset(test_x, test_y) return_sequences = True counts = test_y.unique(return_counts=True)[1].to(torch.float) class_balance = counts / counts.min() trainloader = data.DataLoader( train, batch_size=batch_size, shuffle=True, num_workers=4 ) testloader = data.DataLoader( test, batch_size=batch_size, shuffle=False, num_workers=4 ) num_classes = int(torch.max(train_y).item() + 1) return ( trainloader, testloader, in_features, num_classes, return_sequences, class_balance, )
def interp_(): coeffs = torchcde.natural_cubic_coeffs(path) yield torchcde.NaturalCubicSpline(coeffs) coeffs = torchcde.linear_interpolation_coeffs(path) yield torchcde.LinearInterpolation(coeffs, reparameterise='bump')