def test_manual_seed(self): initial_state = torch.random.get_rng_state() with manual_seed(): self.assertTrue(torch.all(torch.random.get_rng_state() == initial_state)) with manual_seed(1234): self.assertFalse(torch.all(torch.random.get_rng_state() == initial_state)) self.assertTrue(torch.all(torch.random.get_rng_state() == initial_state))
def test_MultivariateNormalQMCEngineDegenerate(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # X, Y iid standard Normal and Z = X + Y, random vector (X, Y, Z) mean = torch.zeros(3, device=device, dtype=dtype) cov = torch.tensor( [[1, 0, 1], [0, 1, 1], [1, 1, 2]], device=device, dtype=dtype ) engine = MultivariateNormalQMCEngine(mean=mean, cov=cov, seed=12345) samples = engine.draw(n=2000) self.assertEqual(samples.dtype, dtype) self.assertEqual(samples.device.type, device.type) self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2)) self.assertTrue(torch.abs(torch.std(samples[:, 0]) - 1) < 1e-2) self.assertTrue(torch.abs(torch.std(samples[:, 1]) - 1) < 1e-2) self.assertTrue(torch.abs(torch.std(samples[:, 2]) - math.sqrt(2)) < 1e-2) for i in (0, 1, 2): _, pval = shapiro(samples[:, i].cpu().numpy()) self.assertGreater(pval, 0.9) cov = np.cov(samples.cpu().numpy().transpose()) self.assertLess(np.abs(cov[0, 1]), 1e-2) self.assertLess(np.abs(cov[0, 2] - 1), 1e-2) # check to see if X + Y = Z almost exactly self.assertTrue( torch.all( torch.abs(samples[:, 0] + samples[:, 1] - samples[:, 2]) < 1e-5 ) )
def test_qEI(self, cuda=False): for double in (True, False): self._setUp(double=double, cuda=cuda) qEI = qExpectedImprovement(self.model_st, best_f=0.0) candidates = joint_optimize( acq_function=qEI, bounds=self.bounds, q=3, num_restarts=10, raw_samples=20, options={"maxiter": 5}, ) self.assertTrue(torch.all(-EPS <= candidates)) self.assertTrue(torch.all(candidates <= 1 + EPS)) qEI = qExpectedImprovement(self.model_fn, best_f=0.0) candidates = joint_optimize( acq_function=qEI, bounds=self.bounds, q=3, num_restarts=10, raw_samples=20, options={"maxiter": 5}, ) self.assertTrue(torch.all(-EPS <= candidates)) self.assertTrue(torch.all(candidates <= 1 + EPS))
def test_MockPosterior(self): mean = torch.rand(2) variance = torch.eye(2) samples = torch.rand(1, 2) mp = MockPosterior(mean=mean, variance=variance, samples=samples) self.assertTrue(torch.equal(mp.mean, mean)) self.assertTrue(torch.equal(mp.variance, variance)) self.assertTrue(torch.all(mp.sample() == samples.unsqueeze(0))) self.assertTrue( torch.all(mp.sample(torch.Size([2])) == samples.repeat(2, 1, 1)) )
def test_NormalQMCEngineShapiroInvTransform(self): engine = NormalQMCEngine(d=2, seed=12345, inv_transform=True) samples = engine.draw(n=250) self.assertEqual(samples.dtype, torch.float) self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2)) self.assertTrue(torch.all(torch.abs(samples.std(dim=0) - 1) < 1e-2)) # perform Shapiro-Wilk test for normality for i in (0, 1): _, pval = shapiro(samples[:, i]) self.assertGreater(pval, 0.9) # make sure samples are uncorrelated cov = np.cov(samples.numpy().transpose()) self.assertLess(np.abs(cov[0, 1]), 1e-2)
def test_serialization_built_vocab(self): self.write_test_ppid_dataset(data_format="tsv") question_field = data.Field(sequential=True) tsv_fields = [("id", None), ("q1", question_field), ("q2", question_field), ("label", None)] tsv_dataset = data.TabularDataset( path=self.test_ppid_dataset_path, format="tsv", fields=tsv_fields) question_field.build_vocab(tsv_dataset) question_pickle_filename = "question.pl" question_pickle_path = os.path.join(self.test_dir, question_pickle_filename) torch.save(question_field, question_pickle_path) loaded_question_field = torch.load(question_pickle_path) assert loaded_question_field == question_field test_example_data = [["When", "do", "you", "use", "シ", "instead", "of", "し?"], ["What", "is", "2+2", "<pad>", "<pad>", "<pad>", "<pad>", "<pad>"], ["Here", "is", "a", "sentence", "with", "some", "oovs", "<pad>"]] # Test results of numericalization original_numericalization = question_field.numericalize(test_example_data) pickled_numericalization = loaded_question_field.numericalize(test_example_data) assert torch.all(torch.eq(original_numericalization, pickled_numericalization))
def test_serialization(self): nesting_field = data.Field(batch_first=True) field = data.NestedField(nesting_field) ex1 = data.Example.fromlist(["john loves mary"], [("words", field)]) ex2 = data.Example.fromlist(["mary cries"], [("words", field)]) dataset = data.Dataset([ex1, ex2], [("words", field)]) field.build_vocab(dataset) examples_data = [ [ ["<w>", "<s>", "</w>"] + ["<cpad>"] * 4, ["<w>"] + list("john") + ["</w>", "<cpad>"], ["<w>"] + list("loves") + ["</w>"], ["<w>"] + list("mary") + ["</w>", "<cpad>"], ["<w>", "</s>", "</w>"] + ["<cpad>"] * 4, ], [ ["<w>", "<s>", "</w>"] + ["<cpad>"] * 4, ["<w>"] + list("mary") + ["</w>", "<cpad>"], ["<w>"] + list("cries") + ["</w>"], ["<w>", "</s>", "</w>"] + ["<cpad>"] * 4, ["<cpad>"] * 7, ] ] field_pickle_filename = "char_field.pl" field_pickle_path = os.path.join(self.test_dir, field_pickle_filename) torch.save(field, field_pickle_path) loaded_field = torch.load(field_pickle_path) assert loaded_field == field original_numericalization = field.numericalize(examples_data) pickled_numericalization = loaded_field.numericalize(examples_data) assert torch.all(torch.eq(original_numericalization, pickled_numericalization))
def test_batch_eval_neg_styblinski_tang(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): X = torch.zeros(2, 3, device=device, dtype=dtype) res = neg_styblinski_tang(X) self.assertEqual(res.dtype, dtype) self.assertEqual(res.device.type, device.type) self.assertEqual(res.shape, torch.Size([2])) self.assertTrue(torch.all(res == 0))
def test_MultivariateNormalQMCEngineShapiroInvTransform(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # test the standard case mean = torch.zeros(2, device=device, dtype=dtype) cov = torch.eye(2, device=device, dtype=dtype) engine = MultivariateNormalQMCEngine( mean=mean, cov=cov, seed=12345, inv_transform=True ) samples = engine.draw(n=250) self.assertEqual(samples.dtype, dtype) self.assertEqual(samples.device.type, device.type) self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2)) self.assertTrue(torch.all(torch.abs(samples.std(dim=0) - 1) < 1e-2)) # perform Shapiro-Wilk test for normality samples = samples.cpu().numpy() for i in (0, 1): _, pval = shapiro(samples[:, i]) self.assertGreater(pval, 0.9) # make sure samples are uncorrelated cov = np.cov(samples.transpose()) self.assertLess(np.abs(cov[0, 1]), 1e-2) # test the correlated, non-zero mean case mean = torch.tensor([1.0, 2.0], device=device, dtype=dtype) cov = torch.tensor([[1.5, 0.5], [0.5, 1.5]], device=device, dtype=dtype) engine = MultivariateNormalQMCEngine( mean=mean, cov=cov, seed=12345, inv_transform=True ) samples = engine.draw(n=250) self.assertEqual(samples.dtype, dtype) self.assertEqual(samples.device.type, device.type) self.assertTrue(torch.all(torch.abs(samples.mean(dim=0) - mean) < 1e-2)) self.assertTrue( torch.all(torch.abs(samples.std(dim=0) - math.sqrt(1.5)) < 1e-2) ) # perform Shapiro-Wilk test for normality samples = samples.cpu().numpy() for i in (0, 1): _, pval = shapiro(samples[:, i]) self.assertGreater(pval, 0.9) # check covariance cov = np.cov(samples.transpose()) self.assertLess(np.abs(cov[0, 1] - 0.5), 1e-2)
def test_to_data(): this_tests(to_data) path = untar_data(URLs.MNIST_SAMPLE) data1 = ImageDataBunch.from_folder(path) ys1 = list(data1.y) a=([1,2,3],[3,6,6]) b=([4,5,6],[4,7,7]) data2 = torch.tensor([a,b]) ys2= list(data2[0]) assert isinstance(data1, fastai.vision.data.ImageDataBunch) assert isinstance(data1.y, ItemList) assert isinstance(ys1, list) assert isinstance(ys1[0], Category) assert isinstance(ys1[0].data, np.int64) assert isinstance(to_data(ys1[0]), np.int64) assert ys1[0].data == to_data(ys1[0]) assert isinstance(data2, torch.Tensor) assert isinstance(data2[0], torch.Tensor) assert isinstance(ys2, list) assert isinstance(ys2[0], torch.Tensor) assert isinstance(ys2[0].data, torch.Tensor) assert isinstance(to_data(ys2[0]), torch.Tensor) assert torch.all(torch.eq(ys2[0].data, to_data(ys2[0])))
def __init__( self, mean: Tensor, cov: Tensor, seed: Optional[int] = None, inv_transform: bool = False, ) -> None: r"""Engine for qMC sampling from a multivariate Normal `N(\mu, \Sigma)`. Args: mean: The mean vector. cov: The covariance matrix. seed: The seed with which to seed the random number generator of the underlying SobolEngine. inv_transform: If True, use inverse transform instead of Box-Muller. """ # validate inputs if not cov.shape[0] == cov.shape[1]: raise ValueError("Covariance matrix is not square.") if not mean.shape[0] == cov.shape[0]: raise ValueError("Dimension mismatch between mean and covariance.") if not torch.allclose(cov, cov.transpose(-1, -2)): raise ValueError("Covariance matrix is not symmetric.") self._mean = mean self._normal_engine = NormalQMCEngine( d=mean.shape[0], seed=seed, inv_transform=inv_transform ) # compute Cholesky decomp; if it fails, do the eigendecomposition try: self._corr_matrix = torch.cholesky(cov).transpose(-1, -2) except RuntimeError: eigval, eigvec = torch.symeig(cov, eigenvectors=True) if not torch.all(eigval >= -1e-8): raise ValueError("Covariance matrix not PSD.") eigval_root = eigval.clamp_min(0.0).sqrt() self._corr_matrix = (eigvec * eigval_root).transpose(-1, -2)
q_estimates = q_values[:1] + torch.sum(discounts[:-1] * torch.cumprod(importance_weights, 0) * deltas, 0, keepdim=True) return q_estimates if __name__ == '__main__': import torch # length x batch size x dim importance_weights = torch.rand((4, 9, 1)) q_values = torch.rand((4, 9, 1)) rewards = torch.rand((3, 9, 1)) print('Test degenerate case lambda is -1 and discount is 0') print('The target should be equal to the first q value') one_target = retrace(q_values, rewards, importance_weights, l=-1, discount=0.) assert torch.all(one_target == q_values[1]) print('Test degenerate case lambda is 1 and discount is 0') print('The target should be equal to the first reward') one_target = retrace(q_values, rewards, importance_weights, l=1., discount=0.) assert torch.all(one_target == rewards[:1]) print('Test Monte Carlo rollouts') print('The target should be the undiscounted sum of reward plus the value function') one_target = retrace(q_values, rewards, None, discount=1., l=1.) mc_rollouts = torch.sum(rewards, 0, keepdim=True) + q_values[-1:] assert torch.all(torch.isclose(one_target, mc_rollouts)) print('Test Discounted Monte Carlo rollouts') print('The target should be the iscounted sum of reward plus the value function') one_target = retrace(q_values, rewards, None, discount=.99, l=1.)
def test_torch_tensor(self): a = tensor([1, 2, 3]) b = torch.tensor([1, 2, 3]) self.assertTrue(torch.all(a == b))
def TimedSolve(MAXITER=30000, SEED=None): if SEED is not None: np.random.seed(SEED) torch.cuda.manual_seed_all(SEED) torch.manual_seed(SEED) else: np.random.seed() torch.cuda.seed_all() # Initialize a random problem starts, goals = maze.randomProblem() # Now we load the start and goal positions onto the GPU for the rest of the computation startPosition = torch.cat([ torch.as_tensor(starts, dtype=torch.float) + 1e-3 * (torch.rand(starts.shape[0], 2) - 0.5), 1e-3 * torch.rand(starts.shape[0], 1) ], dim=-1) goalPosition = torch.cat([ torch.as_tensor(goals, dtype=torch.float), torch.zeros(goals.shape[0], 1) ], dim=-1) input('Random Problem Initialized. Press ENTER to solve!') print('Solving... ', end='', flush=True) startTime = time.time() # Plan the initial trajectories jerk = PlanTrajectories(starts, goals, maze) jerk.requires_grad = True optimizer = torch.optim.Adam([jerk], lr=LEARNING_RATE) i = 0 solved = False while i < MAXITER: # zero back-propogated gradients optimizer.zero_grad() # integrate the jerk trajectory pos, vel, acc = IntegrateJerkTrajectory(jerk, startPosition) mazeCollisions = maze.collisions(pos) # compute the various losses floss, wloss, thrustInfeasible, rateInfeasible = QuadrocopterDynamicsConstraints( pos, vel, acc, jerk) quadCollisions, closs = QuadCollisionConstraint(pos) ploss, vloss, finalPosInfeasible, finalVelInfeasible = GoalConstraints( pos, vel, acc, jerk, goalPosition) # compute the total loss loss = W_THRUST * floss.sum() + \ W_BODYRATE * wloss.sum() + \ W_QUAD_COLLISION * quadCollisions + \ W_MAZE_COLLISION * mazeCollisions + \ W_GOAL_POS * ploss.sum() + \ W_GOAL_VEL * vloss.sum() if VERBOSE and i % 100 == 0: print( "Iteration %d, Violations: Thrust=%d, Rate=%d, QColl=%d, MColl=%d, Final Pos=%d, Final Vel=%d" % (i, int(thrustInfeasible.sum()), int(rateInfeasible.sum()), int(quadCollisions), int(mazeCollisions), int(finalPosInfeasible.sum()), int(finalVelInfeasible.sum())), flush=True) i += 1 if torch.all(finalPosInfeasible == 0) and torch.all( finalVelInfeasible == 0) and torch.all( thrustInfeasible == 0) and torch.all( rateInfeasible == 0) and quadCollisions == 0 and mazeCollisions == 0: solved = True break loss.backward() # backpropogate gradients optimizer.step() if solved: print("Solved in %.3f seconds (%d iterations)" % (time.time() - startTime, i)) else: print( "Unsolved after maximum iterations reached (%.3f seconds, %d iterations)" % (time.time() - startTime, i)) return pos.detach().cpu().numpy( ) # return, for example, the position trajectory as a numpy array for plotting
def test_final(self): devices = [torch.device('cpu')] if torch.cuda.is_available(): devices.append(torch.device('cuda', 0)) for device in devices: for need_map in [True, False]: s = ''' 0 1 2 10 0 1 1 20 1 2 -1 30 2 ''' src = k2.Fsa.from_str(s).to(device).requires_grad_(True) src.float_attr = torch.tensor([0.1, 0.2, 0.3], dtype=torch.float32, requires_grad=True, device=device) src.int_attr = torch.tensor([1, 2, 3], dtype=torch.int32, device=device) src.ragged_attr = k2.RaggedInt('[[1 2 3] [5 6] [1]]').to(device) src.attr1 = 'src' src.attr2 = 'fsa' if need_map: dest, arc_map = k2.expand_ragged_attributes( src, ret_arc_map=True) else: dest = k2.expand_ragged_attributes(src) assert torch.allclose( dest.float_attr, torch.tensor([0.1, 0.2, 0.0, 0.0, 0.0, 0.3, 0.0], dtype=torch.float32, device=device)) assert torch.all( torch.eq( dest.scores, torch.tensor([10, 20, 0, 0, 0, 30, 0], dtype=torch.float32, device=device))) assert torch.all( torch.eq( dest.int_attr, torch.tensor([1, 2, 0, 0, 0, 3, 0], dtype=torch.int32, device=device))) _k2.fix_final_labels(dest.arcs, dest.int_attr) assert torch.all( torch.eq( dest.int_attr, torch.tensor([1, 2, 0, 0, 0, 3, -1], dtype=torch.int32, device=device))) assert torch.all( torch.eq( dest.ragged_attr, torch.tensor([1, 5, 2, 3, 6, 1, -1], dtype=torch.float32, device=device))) # non-tensor attributes... assert dest.attr1 == src.attr1 assert dest.attr2 == src.attr2 # now for autograd scale = torch.tensor([10, 20, 10, 10, 10, 30, 10], device=device) (dest.float_attr * scale).sum().backward() (dest.scores * scale).sum().backward() expected_grad = torch.tensor([10, 20, 30], dtype=torch.float32, device=device) assert torch.all(torch.eq(src.float_attr.grad, expected_grad)) assert torch.all(torch.eq(src.scores.grad, expected_grad))
def test_constant(mu, expected): sitemodel = ConstantSiteModel('constant', mu) assert torch.all(sitemodel.rates() == expected) assert sitemodel.probabilities()[0] == 1.0
samples_per_gpu=batch, workers_per_gpu=cfg.data.workers_per_gpu, dist=True, shuffle=False) d = next(iter(data_loader)) optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001) for i in range(10): optimizer.zero_grad() t2 = time() loss = model(**d) loss, log_vars = _parse_losses(loss) loss.backward() optimizer.step() if i!=0: print(torch.all(torch.eq(next(model.parameters()),p))) n, p = next(model.named_parameters()) print(time()-t2) # for i, data in enumerate(data_loader): # y_pred = model(data) # d = next(iter(data_loader)) # model(d) times = [] for i, data in enumerate(data_loader): with torch.no_grad(): # d, t = inference_detector(model, "data/waymococo_f0/val2020/val_00000_00000_camera1.jpg", cfg)
def test_MaxValueEntropySearch(self): model = MaxValueEntropySearch() model.fit( Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, bounds=self.bounds, feature_names=self.feature_names, metric_names=self.metric_names, task_features=[], fidelity_features=[], ) # test model.gen() new_X_dummy = torch.rand(1, 1, 3, dtype=self.dtype, device=self.device) with mock.patch(self.optimize_acqf) as mock_optimize_acqf: mock_optimize_acqf.side_effect = [(new_X_dummy, None)] Xgen, wgen, _, __ = model.gen( n=1, bounds=self.bounds, objective_weights=self.objective_weights, outcome_constraints=None, linear_constraints=None, model_gen_options={ "acquisition_function_kwargs": self.acq_options, "optimizer_kwargs": self.optimizer_options, }, ) self.assertTrue(torch.equal(Xgen, new_X_dummy.cpu())) self.assertTrue(torch.equal(wgen, torch.ones(1, dtype=self.dtype))) mock_optimize_acqf.assert_called_once() # Check best point selection within bounds (some numerical tolerance) xbest = model.best_point( bounds=self.bounds, objective_weights=self.objective_weights, outcome_constraints=None, linear_constraints=None, model_gen_options={ "acquisition_function_kwargs": self.acq_options, "optimizer_kwargs": self.optimizer_options, }, ) lb = torch.tensor([b[0] for b in self.bounds]) - 1e-5 ub = torch.tensor([b[1] for b in self.bounds]) + 1e-5 self.assertTrue(torch.all(xbest <= ub)) self.assertTrue(torch.all(xbest >= lb)) # test error message in case of constraints linear_constraints = ( torch.tensor([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0]]), torch.tensor([[0.5], [1.0]]), ) with self.assertRaises(UnsupportedError): Xgen, wgen, _, __ = model.gen( n=1, bounds=self.bounds, objective_weights=self.objective_weights, linear_constraints=linear_constraints, ) # test error message in case of >1 objective weights objective_weights = torch.tensor([1.0, 1.0], dtype=self.dtype, device=self.device) with self.assertRaises(UnsupportedError): Xgen, wgen, _, __ = model.gen(n=1, bounds=self.bounds, objective_weights=objective_weights) # test error message in best_point() with self.assertRaises(UnsupportedError): Xgen = model.best_point( bounds=self.bounds, objective_weights=self.objective_weights, linear_constraints=linear_constraints, ) with self.assertRaises(RuntimeError): Xgen = model.best_point( bounds=self.bounds, objective_weights=self.objective_weights, target_fidelities={2: 1.0}, ) # test input warping self.assertFalse(model.use_input_warping) model = MaxValueEntropySearch(use_input_warping=True) model.fit( Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, bounds=self.bounds, feature_names=self.feature_names, metric_names=self.metric_names, task_features=[], fidelity_features=[], ) self.assertTrue(model.use_input_warping) self.assertTrue(hasattr(model.model, "input_transform")) self.assertIsInstance(model.model.input_transform, Warp) # test loocv pseudo likelihood self.assertFalse(model.use_loocv_pseudo_likelihood) model = MaxValueEntropySearch(use_loocv_pseudo_likelihood=True) model.fit( Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, bounds=self.bounds, feature_names=self.feature_names, metric_names=self.metric_names, task_features=[], fidelity_features=[], ) self.assertTrue(model.use_loocv_pseudo_likelihood)
def determine_splits(self) -> torch.Tensor: """ Determine which splits (if any) should occur checking whether the number of steps is a multiple of `split_freq`. If so, we split the `splits_per_step` regions with the largest task gradient distances. If there is a tie for the regions with the largest distance, we split all tying regions. Returns ------- should_split : torch.Tensor Tensor of size (self.num_tasks, self.num_tasks, self.num_regions), where `should_split[i, j, k]` holds 1/True if tasks `i, j` should be split at region `k`, and 0/False otherwise.. """ should_split = torch.zeros(self.num_tasks, self.num_tasks, self.num_regions, device=self.device) # Don't perform splits if the number of steps is less than the minimum or if the # current step doesn't fall on a multiple of the splitting frequency. if torch.all( self.grad_diff_stats.num_steps < self.split_step_threshold): return should_split if self.num_steps % self.split_freq != 0: return should_split # Get normalized distance scores. For each (task1, task2, region), we divide the # corresponding squared gradient distance by the region size, to normalize the # effect that squared gradient distance is linearly scaled by the region size. distance_scores = self.grad_diff_stats.mean / self.region_sizes # Set distance scores to zero for task/region pairs that aren't shared, have too # small sample size, or have task1 < task 2 (this way we avoid duplicate values # from (task1, task2) and (task2, task1). is_shared = self.splitting_map.shared_regions() distance_scores *= is_shared sufficient_sample = self.grad_diff_stats.num_steps >= self.split_step_threshold distance_scores *= sufficient_sample upper_triangle = torch.triu(torch.ones(self.num_tasks, self.num_tasks, device=self.device), diagonal=1) upper_triangle = upper_triangle.unsqueeze(-1).expand( -1, -1, self.num_regions) distance_scores *= upper_triangle # Filter out zero distance pairs and find regions with largest distance. flat_scores = distance_scores.view(-1) flat_scores = flat_scores[(flat_scores > 0).nonzero()].squeeze(-1) num_valid_scores = flat_scores.shape[0] if num_valid_scores > 0: num_splits = min(self.splits_per_step, num_valid_scores) top_values, _ = torch.topk(flat_scores, num_splits) score_threshold = top_values[-1] should_split = distance_scores >= score_threshold return should_split
def sample(self, src_sents: List[List[str]], sample_size=5, max_decoding_time_step=100) -> List[Hypothesis]: """ Given a batched list of source sentences, randomly sample hypotheses from the model distribution p(y|x) Args: src_sents: a list of batched source sentences sample_size: sample size for each source sentence in the batch max_decoding_time_step: maximum number of time steps to unroll the decoding RNN Returns: hypotheses: a list of hypothesis, each hypothesis has two fields: value: List[str]: the decoded target sentence, represented as a list of words score: float: the log-likelihood of the target sentence """ src_sents_var = self.vocab.src.to_input_tensor(src_sents, self.device) src_encodings, dec_init_vec = self.encode(src_sents_var, [len(sent) for sent in src_sents]) src_encodings_att_linear = self.att_src_linear(src_encodings) h_tm1 = dec_init_vec batch_size = len(src_sents) total_sample_size = sample_size * len(src_sents) # (total_sample_size, max_src_len, src_encoding_size) src_encodings = src_encodings.repeat(sample_size, 1, 1) src_encodings_att_linear = src_encodings_att_linear.repeat(sample_size, 1, 1) src_sent_masks = self.get_attention_mask(src_encodings, [len(sent) for _ in range(sample_size) for sent in src_sents]) h_tm1 = (h_tm1[0].repeat(sample_size, 1), h_tm1[1].repeat(sample_size, 1)) att_tm1 = torch.zeros(total_sample_size, self.hidden_size, device=self.device) eos_id = self.vocab.tgt['</s>'] sample_ends = torch.zeros(total_sample_size, dtype=torch.uint8, device=self.device) sample_scores = torch.zeros(total_sample_size, device=self.device) samples = [torch.tensor([self.vocab.tgt['<s>']] * total_sample_size, dtype=torch.long, device=self.device)] t = 0 while t < max_decoding_time_step: t += 1 y_tm1 = samples[-1] y_tm1_embed = self.tgt_embed(y_tm1) if self.input_feed: x = torch.cat([y_tm1_embed, att_tm1], 1) else: x = y_tm1_embed (h_t, cell_t), att_t, alpha_t = self.step(x, h_tm1, src_encodings, src_encodings_att_linear, src_sent_masks=src_sent_masks) # probabilities over target words p_t = F.softmax(self.readout(att_t), dim=-1) log_p_t = torch.log(p_t) # (total_sample_size) y_t = torch.multinomial(p_t, num_samples=1) log_p_y_t = torch.gather(log_p_t, 1, y_t).squeeze(1) y_t = y_t.squeeze(1) samples.append(y_t) sample_ends |= torch.eq(y_t, eos_id) sample_scores = sample_scores + log_p_y_t * (1. - sample_ends.float()) if torch.all(sample_ends): break att_tm1 = att_t h_tm1 = (h_t, cell_t) _completed_samples = [[[] for _1 in range(sample_size)] for _2 in range(batch_size)] for t, y_t in enumerate(samples): for i, sampled_word_id in enumerate(y_t): sampled_word_id = sampled_word_id.cpu().item() src_sent_id = i % batch_size sample_id = i // batch_size if t == 0 or _completed_samples[src_sent_id][sample_id][-1] != eos_id: _completed_samples[src_sent_id][sample_id].append(sampled_word_id) completed_samples = [[None for _1 in range(sample_size)] for _2 in range(batch_size)] for src_sent_id in range(batch_size): for sample_id in range(sample_size): offset = sample_id * batch_size + src_sent_id hyp = Hypothesis(value=self.vocab.tgt.indices2words(_completed_samples[src_sent_id][sample_id])[:-1], score=sample_scores[offset].item()) completed_samples[src_sent_id][sample_id] = hyp return completed_samples
def test_MaxValueEntropySearch_MultiFidelity(self): model = MaxValueEntropySearch() model.fit( Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, bounds=self.bounds, task_features=[], feature_names=self.feature_names, metric_names=self.metric_names, fidelity_features=[-1], ) # Check best point selection within bounds (some numerical tolerance) xbest = model.best_point( bounds=self.bounds, objective_weights=self.objective_weights, target_fidelities={2: 5.0}, ) lb = torch.tensor([b[0] for b in self.bounds]) - 1e-5 ub = torch.tensor([b[1] for b in self.bounds]) + 1e-5 self.assertTrue(torch.all(xbest <= ub)) self.assertTrue(torch.all(xbest >= lb)) # check error when no target fidelities are specified with self.assertRaises(RuntimeError): model.best_point(bounds=self.bounds, objective_weights=self.objective_weights) # check error when target fidelity and fixed features have the same key with self.assertRaises(RuntimeError): model.best_point( bounds=self.bounds, objective_weights=self.objective_weights, target_fidelities={2: 1.0}, fixed_features={2: 1.0}, ) # check generation n = 1 new_X_dummy = torch.rand(1, n, 3, dtype=self.dtype, device=self.device) with mock.patch(self.optimize_acqf, side_effect=[ (new_X_dummy, None) ]) as mock_optimize_acqf: Xgen, wgen, _, __ = model.gen( n=n, bounds=self.bounds, objective_weights=self.objective_weights, outcome_constraints=None, linear_constraints=None, model_gen_options={ "acquisition_function_kwargs": self.acq_options, "optimizer_kwargs": self.optimizer_options, }, target_fidelities={2: 1.0}, ) self.assertTrue(torch.equal(Xgen, new_X_dummy.cpu())) self.assertTrue(torch.equal(wgen, torch.ones(n, dtype=self.dtype))) mock_optimize_acqf.assert_called() # test input warping self.assertFalse(model.use_input_warping) model = MaxValueEntropySearch(use_input_warping=True) model.fit( Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, bounds=self.bounds, task_features=[], feature_names=self.feature_names, metric_names=self.metric_names, fidelity_features=[-1], ) self.assertTrue(model.use_input_warping) self.assertTrue(hasattr(model.model, "input_transform")) self.assertIsInstance(model.model.input_transform, Warp) # test loocv pseudo likelihood self.assertFalse(model.use_loocv_pseudo_likelihood) model = MaxValueEntropySearch(use_loocv_pseudo_likelihood=True) model.fit( Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, bounds=self.bounds, task_features=[], feature_names=self.feature_names, metric_names=self.metric_names, fidelity_features=[-1], ) self.assertTrue(model.use_loocv_pseudo_likelihood)
def test(args, sdae_model, shared_model, env_config, train_process_finish_flags): # Environment variables stock_raw_data = env_config['stock_raw_data'] stock_norm_data = env_config['stock_norm_data'] starting_capital = env_config['starting_capital'] min_episode_length = env_config['min_episode_length'] max_episode_length = env_config['max_episode_length'] max_position = env_config['max_position'] trans_cost_rate = env_config['trans_cost_rate'] slippage_rate = env_config['slippage_rate'] gpu_id = args.gpu_ids[-1] # Set seed torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) # Initialize environment if (trans_cost_rate is not None and slippage_rate is not None): if (args.full_env): env = Single_Stock_Full_Env(stock_raw_data, stock_norm_data, starting_capital, min_episode_length, max_episode_length, max_position, trans_cost_rate, slippage_rate, full_data_episode=True) else: env = Single_Stock_BS_Env(stock_raw_data, stock_norm_data, starting_capital, min_episode_length, max_episode_length, max_position, trans_cost_rate, slippage_rate, full_data_episode=True) else: if (args.full_env): env = Single_Stock_Full_Env(stock_raw_data, stock_norm_data, starting_capital, min_episode_length, max_episode_length, max_position, full_data_episode=True) else: env = Single_Stock_BS_Env(stock_raw_data, stock_norm_data, starting_capital, min_episode_length, max_episode_length, max_position, full_data_episode=True) state = env.get_current_input_to_model() agent_model = A3C_LSTM(args.rl_input_dim, args.num_actions) agent = Agent(sdae_model, agent_model, args) agent.gpu_id = gpu_id cx = Variable(torch.zeros(1, LSTM_SIZE)) hx = Variable(torch.zeros(1, LSTM_SIZE)) if gpu_id >= 0: with torch.cuda.device(gpu_id): agent.model = agent.model.cuda() agent.model.train() cx = cx.cuda() hx = hx.cuda() state = state.cuda() test_num = 0 reward_list = [] final_equity_list = [] max_reward = -1e10 # If all training processes have ended this will be True. Then, one more run would be done to capture final result terminate_next_iter = False while True: if gpu_id >= 0: with torch.cuda.device(gpu_id): agent.model.load_state_dict(shared_model.state_dict()) else: agent.model.load_state_dict(shared_model.state_dict()) episodic_reward = 0.0 count = 0 actions = [] rewards = [] pv_list = [] pv_change_list = [] while env.done is False: action, (next_hx, next_cx) = agent.select_action(state, (hx, cx), training=False) actions.append(action - 3) reward, next_state, _ = env.step(action) """ rewards.append(reward) pv_list.append(env.calc_total_portfolio_value()) if(count == 0): pv_change_list.append(0.0) else: pv_change_list.append(pv_list[count] - pv_list[count - 1]) """ episodic_reward += reward state = next_state (hx, cx) = (next_hx, next_cx) count += 1 index_list = [i for i in range(1, len(pv_list) + 1)] """ #print(pv_list) print(max(pv_list)) print(min(pv_list)) print(sum(rewards)) fig, (ax1, ax2, ax3) = plt.subplots(1, 3) ax1.plot(index_list, pv_list) ax2.plot(index_list, rewards) ax3.plot(index_list, pv_change_list) plt.show() exit() """ # Results logging reward_list.append(episodic_reward) port_value = env.calc_total_portfolio_value() final_equity_list.append(port_value) test_num += 1 #print("Test num: " + str(test_num) + " | Test reward: " + str(episodic_reward) + " | Final equity: " + str(port_value)) #print(env.curr_holdings) print( "Test num: {0} | Test reward: {1} | Holdings: {2} | End Capital: {3} | Final equity : {4}" .format(test_num, episodic_reward, env.curr_holdings[0], env.curr_capital, port_value)) print(Counter(actions)) print("\n") sys.stdout.flush() env.reset() state = env.get_current_input_to_model() if gpu_id >= 0: with torch.cuda.device(gpu_id): hx = Variable(torch.zeros(1, LSTM_SIZE).cuda()) cx = Variable(torch.zeros(1, LSTM_SIZE).cuda()) state = state.cuda() else: hx = Variable(torch.zeros(1, LSTM_SIZE)) cx = Variable(torch.zeros(1, LSTM_SIZE)) # Save model if (args.use_filter_data): model_name = args.stock_env + "_p1" + str( args.period_1) + "_p2" + str(args.period_2) + "_minEL" + str( args.min_episode_length ) + "_maxEL" + str(args.max_episode_length) + "_nstep" + str( args.num_steps ) + "_ntrainstep" + str(args.num_train_steps) + "_lr" + str( args.lr) + "_gamma" + str(args.gamma) + "_tau" + str( args.tau) + "_best_filtered_fyear" + str( args.filter_by_year ) + "_full" if args.full_env else "" + ".pt" else: model_name = args.stock_env + "_p1" + str( args.period_1) + "_p2" + str(args.period_2) + "_minEL" + str( args.min_episode_length ) + "_maxEL" + str(args.max_episode_length) + "_nstep" + str( args.num_steps ) + "_ntrainstep" + str(args.num_train_steps) + "_lr" + str( args.lr) + "_gamma" + str(args.gamma) + "_tau" + str( args.tau ) + "_full" if args.full_env else "" + "_best.pt" if (terminate_next_iter): if (args.use_filter_data): model_name = args.stock_env + "_p1" + str( args.period_1 ) + "_p2" + str(args.period_2) + "_minEL" + str( args.min_episode_length ) + "_maxEL" + str(args.max_episode_length) + "_nstep" + str( args.num_steps ) + "_ntrainstep" + str(args.num_train_steps) + "_lr" + str( args.lr) + "_gamma" + str(args.gamma) + "_tau" + str( args.tau) + "_final_filtered_fyear" + str( args.filter_by_year ) + "_full" if args.full_env else "" + ".pt" else: model_name = args.stock_env + "_p1" + str( args.period_1 ) + "_p2" + str(args.period_2) + "_minEL" + str( args.min_episode_length ) + "_maxEL" + str(args.max_episode_length) + "_nstep" + str( args.num_steps ) + "_ntrainstep" + str(args.num_train_steps) + "_lr" + str( args.lr) + "_gamma" + str(args.gamma) + "_tau" + str( args.tau ) + "_full" if args.full_env else "" + "_final.pt" if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = agent.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, model_name)) else: state_to_save = agent.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, model_name)) print("saved final") break else: if (episodic_reward > max_reward): #model_name = args.stock_env + "_p1" + str(args.period_1) + "_p2" + str(args.period_2) + "_minEL" + str(args.min_episode_length) + "_maxEL" + str(args.max_episode_length) + "_nstep" + str(args.num_steps) + "_ntrainstep" + str(args.num_train_steps) + "_lr" + str(args.lr) + "_gamma" + str(args.gamma) + "_tau" + str(args.tau) + "_best.pt" if gpu_id >= 0: with torch.cuda.device(gpu_id): state_to_save = agent.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, model_name)) else: state_to_save = agent.model.state_dict() torch.save( state_to_save, '{0}{1}.dat'.format(args.save_model_dir, model_name)) # Save results if (args.use_filter_data): np.save(RESULT_DATA_PATH + "epi_reward_filtered_" + model_name, np.array(reward_list)) np.save(RESULT_DATA_PATH + "portfolio_filtered_" + model_name, np.array(final_equity_list)) else: np.save(RESULT_DATA_PATH + "epi_reward_" + model_name, np.array(reward_list)) np.save(RESULT_DATA_PATH + "portfolio_" + model_name, np.array(final_equity_list)) if (torch.all(train_process_finish_flags == torch.ones( train_process_finish_flags.size(0)))): terminate_next_iter = True print("From test process: all training process terminated") sys.stdout.flush()
def train_with_pruning_callback( tmpdir, parameters_to_prune=False, use_global_unstructured=False, pruning_fn="l1_unstructured", use_lottery_ticket_hypothesis=False, accelerator=None, gpus=None, num_processes=1, ): model = TestModel() # Weights are random. None is 0 assert torch.all(model.layer.mlp_2.weight != 0) pruning_kwargs = { "pruning_fn": pruning_fn, "amount": 0.3, "use_global_unstructured": use_global_unstructured, "use_lottery_ticket_hypothesis": use_lottery_ticket_hypothesis, "verbose": 1, } if parameters_to_prune: pruning_kwargs["parameters_to_prune"] = [(model.layer.mlp_1, "weight"), (model.layer.mlp_2, "weight")] else: pruning_kwargs["parameter_names"] = ["weight"] if isinstance(pruning_fn, str) and pruning_fn.endswith("_structured"): pruning_kwargs["pruning_dim"] = 0 if pruning_fn == "ln_structured": pruning_kwargs["pruning_norm"] = 1 # Misconfiguration checks if isinstance(pruning_fn, str) and pruning_fn.endswith("_structured") and use_global_unstructured: with pytest.raises(MisconfigurationException, match="is supported with `use_global_unstructured=True`"): ModelPruning(**pruning_kwargs) return if ModelPruning._is_pruning_method(pruning_fn) and not use_global_unstructured: with pytest.raises(MisconfigurationException, match="currently only supported with"): ModelPruning(**pruning_kwargs) return pruning = ModelPruning(**pruning_kwargs) trainer = Trainer( default_root_dir=tmpdir, progress_bar_refresh_rate=0, weights_summary=None, checkpoint_callback=False, logger=False, limit_train_batches=10, limit_val_batches=2, max_epochs=10, accelerator=accelerator, gpus=gpus, num_processes=num_processes, callbacks=pruning, ) trainer.fit(model) trainer.test(model) if not accelerator: # Check some have been pruned assert torch.any(model.layer.mlp_2.weight == 0)
def test_uniform_probabilities(self): sampler = UniformSampler(5) probabilities = sampler.get_probability_map(self.sample) fixtures = torch.ones_like(probabilities) assert torch.all(probabilities.eq(fixtures))
def sample(self, src_sents: List[List[str]], sample_size=5, max_decoding_time_step=100) -> List[Hypothesis]: """ Given a batched list of source sentences, randomly sample hypotheses from the model distribution p(y|x) Args: src_sents: a list of batched source sentences sample_size: sample size for each source sentence in the batch max_decoding_time_step: maximum number of time steps to unroll the decoding RNN Returns: hypotheses: a list of hypothesis, each hypothesis has two fields: value: List[str]: the decoded target sentence, represented as a list of words score: float: the log-likelihood of the target sentence """ src_sents_var = self.vocab.src.to_input_tensor(src_sents, self.device) src_encodings, dec_init_vec = self.encode(src_sents_var, [len(sent) for sent in src_sents]) src_encodings_att_linear = self.att_src_linear(src_encodings) h_tm1 = dec_init_vec batch_size = len(src_sents) total_sample_size = sample_size * len(src_sents) # (total_sample_size, max_src_len, src_encoding_size) src_encodings = src_encodings.repeat(sample_size, 1, 1) src_encodings_att_linear = src_encodings_att_linear.repeat(sample_size, 1, 1) src_sent_masks = self.get_attention_mask(src_encodings, [len(sent) for _ in range(sample_size) for sent in src_sents]) h_tm1 = (h_tm1[0].repeat(sample_size, 1), h_tm1[1].repeat(sample_size, 1)) att_tm1 = torch.zeros(total_sample_size, self.hidden_size, device=self.device) eos_id = self.vocab.tgt['</s>'] sample_ends = torch.zeros(total_sample_size, dtype=torch.uint8, device=self.device) sample_scores = torch.zeros(total_sample_size, device=self.device) samples = [torch.tensor([self.vocab.tgt['<s>']] * total_sample_size, dtype=torch.long, device=self.device)] t = 0 while t < max_decoding_time_step: t += 1 y_tm1 = samples[-1] y_tm1_embed = self.tgt_embed(y_tm1) if self.input_feed: x = torch.cat([y_tm1_embed, att_tm1], 1) else: x = y_tm1_embed (h_t, cell_t), att_t, alpha_t = self.step(x, h_tm1, src_encodings, src_encodings_att_linear, src_sent_masks=src_sent_masks) # probabilities over target words p_t = F.softmax(self.readout(att_t), dim=-1) log_p_t = torch.log(p_t) # (total_sample_size) y_t = torch.multinomial(p_t, num_samples=1) log_p_y_t = torch.gather(log_p_t, 1, y_t).squeeze(1) y_t = y_t.squeeze(1) samples.append(y_t) sample_ends |= torch.eq(y_t, eos_id).byte() sample_scores = sample_scores + log_p_y_t * (1. - sample_ends.float()) if torch.all(sample_ends): break att_tm1 = att_t h_tm1 = (h_t, cell_t) _completed_samples = [[[] for _1 in range(sample_size)] for _2 in range(batch_size)] for t, y_t in enumerate(samples): for i, sampled_word_id in enumerate(y_t): sampled_word_id = sampled_word_id.cpu().item() src_sent_id = i % batch_size sample_id = i // batch_size if t == 0 or _completed_samples[src_sent_id][sample_id][-1] != eos_id: _completed_samples[src_sent_id][sample_id].append(sampled_word_id) completed_samples = [[None for _1 in range(sample_size)] for _2 in range(batch_size)] for src_sent_id in range(batch_size): for sample_id in range(sample_size): offset = sample_id * batch_size + src_sent_id hyp = Hypothesis(value=self.vocab.tgt.indices2words(_completed_samples[src_sent_id][sample_id])[:-1], score=sample_scores[offset].item()) completed_samples[src_sent_id][sample_id] = hyp return completed_samples
def synthesis(): cfg, G, lidar, device = setup_synthesis() with st.sidebar.expander("run options"): num_samples, latent_type = set_synthesis_options() with st.sidebar.expander("view options"): R, t, cmap = set_view_options(device) run_synthesis = st.button("run") if run_synthesis: if latent_type == "random": latent = torch.randn(num_samples, cfg.model.gen.in_ch, device=device) elif latent_type == "slerp": latent = torch.randn(2, cfg.model.gen.in_ch, device=device) latent = [ slerp(w, latent[[0]], latent[[1]]) for w in torch.linspace(0, 1, num_samples) ] latent = torch.cat(latent, dim=0) elif latent_type == "lerp": latent = torch.randn(2, cfg.model.gen.in_ch, device=device) latent = [ lerp(w, latent[[0]], latent[[1]]) for w in torch.linspace(0, 1, num_samples) ] latent = torch.cat(latent, dim=0) out = G(latent) out = utils.postprocess(out, lidar) export = [] if "depth_orig" in out: tensor = utils.colorize(out["depth_orig"] * color_scale, cmap) export.append(("inverse_depth", tensor)) if "confidence" in out: if out["confidence"].shape[1] == 2: tensor = utils.colorize(out["confidence"][:, [0]], cmap) export.append(("measurability pix", tensor)) tensor = utils.colorize(out["confidence"][:, [1]], cmap) export.append(("measurability img", tensor)) export.append(("mask pix", out["mask"][:, [0]])) export.append(("mask img", out["mask"][:, [1]])) export.append( ("mask", torch.prod(out["mask"], dim=1, keepdim=True))) else: tensor = utils.colorize(out["confidence"], cmap) export.append(("measurability", tensor)) export.append(("mask", out["mask"])) tensor = utils.colorize(out["depth"] * color_scale, cmap) export.append(("inverse depth w/ point drops", tensor)) export.append(("point normal", out["normals"])) bev = render_point_clouds( utils.flatten(out["points"]), utils.flatten(out["normals"]), L=512, R=R, t=t, ) bev_alpha = torch.all(bev != 0.0, dim=1, keepdim=True).float() export.append(("point clouds", torch.cat([bev, bev_alpha], dim=1))) cols = st.columns(num_samples) for i in range(num_samples): with cols[i]: for caption, tensor in export: st.image( to_np(tensor[i]), caption=caption, use_column_width=True, output_format="png", )
def test_param(self, degrees, translate, scale, shear, resample, align_corners, return_transform, same_on_batch, device, dtype): _degrees = degrees if isinstance(degrees, (int, float, list, tuple)) else \ nn.Parameter(degrees.clone().to(device=device, dtype=dtype)) _translate = translate if isinstance(translate, (int, float, list, tuple)) else \ nn.Parameter(translate.clone().to(device=device, dtype=dtype)) _scale = scale if isinstance(scale, (int, float, list, tuple)) else \ nn.Parameter(scale.clone().to(device=device, dtype=dtype)) _shear = shear if isinstance(shear, (int, float, list, tuple)) else \ nn.Parameter(shear.clone().to(device=device, dtype=dtype)) torch.manual_seed(0) input = torch.randint( 255, (2, 3, 10, 10, 10), device=device, dtype=dtype) / 255. aug = RandomAffine3D(_degrees, _translate, _scale, _shear, resample, align_corners=align_corners, return_transform=return_transform, same_on_batch=same_on_batch, p=1.) if return_transform: output, _ = aug(input) else: output = aug(input) if len(list(aug.parameters())) != 0: mse = nn.MSELoss() opt = torch.optim.SGD(aug.parameters(), lr=10) loss = mse(output, torch.ones_like(output) * 2) # to ensure that a big loss value could be obtained loss.backward() opt.step() if not isinstance(degrees, (int, float, list, tuple)): assert isinstance(aug.degrees, torch.Tensor) # Assert if param not updated if resample == 'nearest' and aug.degrees.is_cuda: # grid_sample in nearest mode and cuda device returns nan than 0 pass elif resample == 'nearest' or torch.all(aug.degrees._grad == 0.): # grid_sample will return grad = 0 for resample nearest # https://discuss.pytorch.org/t/autograd-issue-with-f-grid-sample/76894 assert (degrees.to(device=device, dtype=dtype) - aug.degrees.data).sum() == 0 else: assert (degrees.to(device=device, dtype=dtype) - aug.degrees.data).sum() != 0 if not isinstance(translate, (int, float, list, tuple)): assert isinstance(aug.translate, torch.Tensor) # Assert if param not updated if resample == 'nearest' and aug.translate.is_cuda: # grid_sample in nearest mode and cuda device returns nan than 0 pass elif resample == 'nearest' or torch.all(aug.translate._grad == 0.): # grid_sample will return grad = 0 for resample nearest # https://discuss.pytorch.org/t/autograd-issue-with-f-grid-sample/76894 assert (translate.to(device=device, dtype=dtype) - aug.translate.data).sum() == 0 else: assert (translate.to(device=device, dtype=dtype) - aug.translate.data).sum() != 0 if not isinstance(scale, (int, float, list, tuple)): assert isinstance(aug.scale, torch.Tensor) # Assert if param not updated if resample == 'nearest' and aug.scale.is_cuda: # grid_sample in nearest mode and cuda device returns nan than 0 pass elif resample == 'nearest' or torch.all(aug.scale._grad == 0.): # grid_sample will return grad = 0 for resample nearest # https://discuss.pytorch.org/t/autograd-issue-with-f-grid-sample/76894 assert (scale.to(device=device, dtype=dtype) - aug.scale.data).sum() == 0 else: assert (scale.to(device=device, dtype=dtype) - aug.scale.data).sum() != 0 if not isinstance(shear, (int, float, list, tuple)): assert isinstance(aug.shears, torch.Tensor) # Assert if param not updated if resample == 'nearest' and aug.shears.is_cuda: # grid_sample in nearest mode and cuda device returns nan than 0 pass elif resample == 'nearest' or torch.all(aug.shears._grad == 0.): # grid_sample will return grad = 0 for resample nearest # https://discuss.pytorch.org/t/autograd-issue-with-f-grid-sample/76894 assert (shear.to(device=device, dtype=dtype) - aug.shears.data).sum() == 0 else: assert (shear.to(device=device, dtype=dtype) - aug.shears.data).sum() != 0
def inversion(): cfg, G, lidar, device, dataset = setup_inversion() if dataset is None: st.write("please set the dataset path!") st.markdown( "e.g. `ln -s /path/to/your/kitti_odometry ./data/kitti_odometry`") return # options with st.sidebar.expander("run options"): n, corruption, distance, num_step = set_inversion_options( n_max=len(dataset)) num_code = st.select_slider( "#latents", [2**i for i in range(7)], help="if >1, mGANprior is applied", ) if num_code != 1: feature_shapes = get_feature_shapes(G, (1, cfg.model.gen.in_ch), device) layer_name = st.selectbox( "composition layer", list(feature_shapes.keys()), help="a layer name to fuse the multiple latents", ) _, feature_shape = feature_shapes[layer_name] _, feature_ch, _, _ = feature_shape # B,C,H,W with st.sidebar.expander("view options"): R, t, cmap = set_view_options(device) # stylegan2 settings perturb_latent = True noise_ratio = 0.75 noise_sigma = 1.0 lr_rampup_ratio = 0.05 lr_rampdown_ratio = 0.25 def lr_schedule(iteration): t = iteration / num_step gamma = np.clip((1.0 - t) / lr_rampdown_ratio, None, 1.0) gamma = 0.5 - 0.5 * np.cos(gamma * np.pi) gamma = gamma * np.clip(t / lr_rampup_ratio, None, 1.0) return gamma # get target data item = dataset.__getitem__(n) dep_ref = item["depth"][None].to(device).float() mask_ref = item["mask"][None].to(device).float() inv_ref_full = lidar.invert_depth(dep_ref) inv_ref_full = mask_ref * inv_ref_full + (1 - mask_ref) * 0.0 points_ref_full = lidar.inv_to_xyz(inv_ref_full) normals_ref_full = utils.xyz_to_normal(points_ref_full, mode="closest") # corruption process dep_ref, mask_ref = apply_corruption(dep_ref, mask_ref, corruption) inv_ref = lidar.invert_depth(dep_ref) inv_ref = mask_ref * inv_ref + (1 - mask_ref) * 0.0 points_ref = lidar.inv_to_xyz(inv_ref) bev_ref = render_point_clouds( utils.flatten(points_ref), utils.flatten(normals_ref_full), L=512, R=R, t=t, ) run_inversion = st.button("run") progress_title = st.text("progress") progress_bar = st.progress(0) cols = st.columns(2) with cols[0]: st.text(f"target #{n}") st.image( np.dstack([ to_np(bev_ref[0]), to_np( torch.all(bev_ref != 0.0, dim=1, keepdim=True).float()[0]), ]), caption="point clouds", output_format="png", use_column_width=True, ) st.image( to_np(utils.colorize(inv_ref * color_scale, cmap)[0]), caption="inverse depth", output_format="png", use_column_width=True, ) st.image( to_np(mask_ref[0]), caption="mask", output_format="png", use_column_width=True, ) if corruption != None: st.image( to_np(utils.colorize(inv_ref_full * color_scale, cmap)[0]), caption="inverse depth (full)", output_format="png", use_column_width=True, ) with cols[1]: st.text("inversion") if len(distance) == 0: st.error("loss should be selected") return show_gen_bev = st.empty() show_gen_depth = st.empty() show_gen_depth_orig = st.empty() show_gen_mask = st.empty() if run_inversion: # trainable latent code torch.manual_seed(0) latent = torch.randn(num_code, cfg.model.gen.in_ch, device=device) latent.div_(latent.pow(2).mean(dim=1, keepdim=True).add(1e-8).sqrt()) latent = torch.nn.Parameter(latent).requires_grad_() optim_z = SphericalOptimizer([latent], lr=0.1) scheduler_z = torch.optim.lr_scheduler.LambdaLR(optim_z, lr_lambda=lr_schedule) if num_code != 1: alpha = torch.full( (num_code, feature_ch, 1, 1), fill_value=1 / num_code, device=device, ) alpha = torch.nn.Parameter(alpha).requires_grad_() # multi-code inversion def feature_composition(m, i, o): o = (o * alpha).sum(dim=0, keepdim=True) return o hooks = [] for name, module in G.named_modules(): module._forward_hooks = OrderedDict() # reset hooks if name == layer_name: hooks.append( module.register_forward_hook(feature_composition)) optim_a = torch.optim.Adam([alpha], lr=0.001) scheduler_a = torch.optim.lr_scheduler.LambdaLR( optim_a, lr_lambda=lr_schedule) # optimize the latent for cur_step in range(num_step): progress = cur_step / num_step # noise w = max(0.0, 1.0 - progress / noise_ratio) noise_strength = 0.05 * noise_sigma * w**2 noise = noise_strength * torch.randn_like(latent) # forward G out = G(latent + noise if perturb_latent else latent) out = utils.postprocess(out, lidar) if "dusty" in cfg.model.gen.arch: inv_gen = out["depth_orig"] else: inv_gen = out["depth"] # loss loss = 0 if "chamfer" in distance: dl, dr = chamfer_distance( utils.flatten(points_ref), utils.flatten(out["points"]), ) loss += dl.mean(dim=1) + dr.mean(dim=1) if "l1" in distance: loss += masked_loss(inv_ref, inv_gen, mask_ref, "l1").mean() if "l2" in distance: loss += masked_loss(inv_ref, inv_gen, mask_ref, "l2").mean() # per-sample gradients optim_z.zero_grad() if num_code != 1: optim_a.zero_grad() loss.backward(gradient=torch.ones_like(loss)) optim_z.step() scheduler_z.step() if num_code != 1: optim_a.step() scheduler_a.step() # make figures if "depth_orig" in out: inv_orig_gen = utils.colorize(out["depth_orig"] * color_scale, cmap) if "mask" in out: if out["mask"].shape[1] == 2: mask_gen = torch.prod(out["mask"], dim=1, keepdim=True) else: mask_gen = out["mask"] inv_gen = utils.colorize(out["depth"] * color_scale, cmap) bev_gen = render_point_clouds( utils.flatten(out["points"]), utils.flatten(out["normals"]), L=512, R=R, t=t, ) show_gen_bev.image( np.dstack([ to_np(bev_gen[0]), to_np((bev_gen != 0.0).float()[0, [0]]) ]), caption="point clouds", output_format="png", use_column_width=True, ) show_gen_depth.image( to_np(inv_gen[0]), caption="inverse depth w/ point drops", output_format="png", use_column_width=True, ) if "depth_orig" in out: show_gen_depth_orig.image( to_np(inv_orig_gen[0]), caption="inverse depth", output_format="png", use_column_width=True, ) if "confidence" in out: show_gen_mask.image( to_np(mask_gen[0]), caption="sampled mask", output_format="png", use_column_width=True, ) progress_bar.progress(progress) progress_title.text(f"progress {int(progress*100):d}%") progress_bar.progress(1.0) progress_title.text(f"progress completed!") st.balloons()
def forward(self, image, target_label=None, no_grad=True, **kwargs): # set no_grad to True will not construct gradient graph of r, thus drastically reduce gpu memory usage # kwargs is used to maintain dropout mask # init some variables num_image = image.size()[0] image_shape = image.size() adv_image = image self.not_done = torch.ones(num_image, dtype=torch.uint8).to(image.device) max_iter = args.df_train_max_iter if self.training else args.df_test_max_iter r = 0 # get label and target_label logit = self.net_forward(adv_image, no_grad=no_grad, **kwargs) orig_pred = logit.argmax(dim=1) pred = orig_pred if target_label is None: # untargeted attack attack_type = 'untargeted' target_label = torch.sort(-logit, dim=1)[1] target_label = target_label.data[:, :self.num_label] else: # targeted attack attack_type = 'targeted' target_label = torch.cat([ orig_pred.view(num_image, 1), target_label.view(num_image, 1) ], dim=1) for iteration in range(max_iter): # get logit diff (a.k.a, f - f0) logit_diff = logit - logit.gather(1, pred.view(num_image, 1)) # get grad diff (a.k.a, w - w0) if attack_type == 'targeted': target_label = torch.cat([ pred.view(num_image, 1), target_label[:, 1].view( num_image, 1) ], dim=1) grad = self.inversenet_backward(adv_image, target_label.contiguous(), no_grad=no_grad, **kwargs) grad_diff = grad - grad[:, :, 0].contiguous().view( grad.size()[0], grad.size()[1], 1).expand_as(grad) r_this_step = \ self.project_boundary_polyhedron(grad_diff[:, :, 1:], logit_diff.gather(1, target_label[:, 1:])) # if an image is already successfully fooled, no more perturbation should be applied to it r_this_step = r_this_step * self.not_done.float().view( num_image, 1) # add some overshot r_this_step = (1 + args.df_overshot) * r_this_step.view(image_shape) # accumulate r r = r + r_this_step # add r_this_step to adv_image adv_image = adv_image + r_this_step # stop gradient for efficient training (if necessary) if not self.training or args.no_bp_prev_r: adv_image = adv_image.detach() adv_image.requires_grad = True # test whether we have successfully fooled these images logit = self.net_forward(adv_image, no_grad=no_grad, **kwargs) pred = logit.argmax(dim=1) if attack_type == 'untargeted': self.not_done = self.not_done * pred.eq(orig_pred) else: self.not_done = self.not_done * ~(pred.eq(target_label[:, 1])) if torch.all(~self.not_done).item(): # break if already fooled all images break return r
def icdf(self, x): assert torch.all(torch.logical_and( x >= 0, x <= 1)), 'All inputs should be between 0 and 1' bin_idx = self._get_inverse_bin_idx(x) # Linear interpolate within the selected bin return (x - self.bin_shift_[bin_idx]) / self.bin_scale_[bin_idx]
def test_invariant_batch(): prop_invariant = torch.tensor([[0.2], [0.3]]) site_model = InvariantSiteModel('pinv', Parameter('inv', prop_invariant)) rates = site_model.rates() props = site_model.probabilities() assert torch.all(rates.mul(props).sum(-1) == torch.ones(2))
def fgm(x, net: nn.Module, eps: float = 0.3, ordr=np.inf, y=None, clip_min=None, clip_max=None, targeted=False, sanity_checks=True): """ Implementation of the Fast Gradient Method. Arguments --------- net : nn.Module The model on which to perform the attack. x : torch.tensor The input to the net. y : torch.tensor, optional True labels. If targeted is true, then provide the target label. Otherwise, only model predictions are used as labels to avoid the "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None. Labels should be one hot encoded. eps: float, optional The epsilon (input variation parameter) ord : [np.inf, 1, 2], optional Order of the norm. clip_min : float Minimum float value for adversarial example components. clip_max : float Maximum float value for adversarial example components. targeted : bool, optional Is the attack targeted or untargeted? Untargeted will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. Returns ------- adv_x : torch.Tensor The adversarial example. """ if ordr not in [np.inf, 1, 2]: raise ValueError('Norm order must be either np.inf, 1, or 2.') if clip_min: assert torch.all( x > torch.tensor(clip_min, device=x.device, dtype=x.dtype)) if clip_max: assert torch.all( x < torch.tensor(clip_max, device=x.device, dtype=x.dtype)) # x needs to have requires_grad set to True # for its grad to be computed and stored properly in a backward call x = x.clone().detach().requires_grad_(True) if y is None: # Inplace operations not working for some bug #15070 # TODO Update when fixed if len(x.shape) == 3: x = x.unsqueeze(0) _, y = torch.max(net(x), dim=1) # Compute loss crit = nn.CrossEntropyLoss() loss = crit(net(x), y) # If attack is targeted, minimize loss of target label rather than maximize # loss of correct label. if targeted: loss = -loss # Define gradient of loss wrt input loss.backward() opt_pert = optimize_linear(x.grad, eps, ordr) # Add perturbation to original example to obtain adversarial example adv_x = x + opt_pert # If clipping is needed, reset all values outside of [clip_min, clip_max] if (clip_min is not None) or (clip_max is not None): # We don't currently support one-sided clipping assert clip_min is not None and clip_max is not None adv_x = torch.clamp(adv_x, clip_min, clip_max) return adv_x
in_dim = out_dim = emb_dim batch_size = 1 n_nodes = 4 n_edge_types = 1 _in_state = torch.rand(batch_size, n_nodes, emb_dim) _adj_matrix = torch.eye(n_nodes).view( batch_size, n_nodes, n_nodes, n_edge_types) # Connect nodes 0 and 1 _adj_matrix[0, 0, 1, 0] = 1 _adj_matrix[0, 1, 0, 0] = 1 # Check if everything works when adjacency matrix is sparse # Print out the intermediate results (tmp_state) # To make sure that features will only be used/shared between neighbors print(_in_state) t_adj_matrix = to_dense(_adj_matrix).view( 1, n_nodes, n_nodes * n_edge_types) t_state = torch.bmm(t_adj_matrix, _in_state) print(t_state) # Assert the nodes attributes are unchanged except for connected ones assert torch.all(torch.eq(_in_state[:batch_size, 2:, :emb_dim], t_state[:batch_size, 2:, :emb_dim]))
def _deeplift_assert( self, model: Module, attr_method: Union[DeepLift, DeepLiftShap], inputs: Tuple[Tensor, ...], baselines, custom_attr_func: Callable[..., Tuple[Tensor, ...]] = None, ) -> None: input_bsz = len(inputs[0]) if callable(baselines): baseline_parameters = signature(baselines).parameters if len(baseline_parameters) > 0: baselines = baselines(inputs) else: baselines = baselines() baseline_bsz = (len(baselines[0]) if isinstance( baselines[0], torch.Tensor) else 1) # Run attribution multiple times to make sure that it is # working as expected for _ in range(5): model.zero_grad() attributions, delta = attr_method.attribute( inputs, baselines, return_convergence_delta=True, custom_attribution_func=custom_attr_func, ) attributions_without_delta = attr_method.attribute( inputs, baselines, custom_attribution_func=custom_attr_func) for attribution, attribution_without_delta in zip( attributions, attributions_without_delta): self.assertTrue( torch.all(torch.eq(attribution, attribution_without_delta))) if isinstance(attr_method, DeepLiftShap): self.assertEqual([input_bsz * baseline_bsz], list(delta.shape)) else: self.assertEqual([input_bsz], list(delta.shape)) delta_external = attr_method.compute_convergence_delta( attributions, baselines, inputs) assertArraysAlmostEqual(delta, delta_external, 0.0) delta_condition = all(abs(delta.numpy().flatten()) < 0.00001) self.assertTrue( delta_condition, "The sum of attribution values {} is not " "nearly equal to the difference between the endpoint for " "some samples".format(delta), ) for input, attribution in zip(inputs, attributions): self.assertEqual(input.shape, attribution.shape) if (isinstance(baselines[0], (int, float)) or inputs[0].shape == baselines[0].shape): # Compare with Integrated Gradients ig = IntegratedGradients(model) attributions_ig = ig.attribute(inputs, baselines) assertAttributionComparision(self, attributions, attributions_ig)
def verify_skip(shape, skip): expected = T.rand(shape) s = seq_skip(expected, skip) actual = reverse_seq_skip(s, skip) assert T.all(actual == expected)
def test_tensor_with_ndarray(): this_tests(tensor) b=np.array(a, dtype=np.int64) r = tensor(b) assert np_address(r.numpy()) == np_address(b) assert torch.all(r==exp)
# verify part_0 with graph_partition_book eid = [] gpb = dgl.distributed.graph_partition_book.RangePartitionBook( 0, num_parts, node_map, edge_map, {ntype: i for i, ntype in enumerate(hg.ntypes)}, {etype: i for i, etype in enumerate(hg.etypes)}) subg0 = dgl.load_graphs('{}/part0/graph.dgl'.format(partitions_folder))[0][0] for etype in hg.etypes: type_eid = th.zeros((1, ), dtype=th.int64) eid.append(gpb.map_to_homo_eid(type_eid, etype)) eid = th.cat(eid) part_id = gpb.eid2partid(eid) assert th.all(part_id == 0) local_eid = gpb.eid2localeid(eid, 0) assert th.all(local_eid == eid) assert th.all(subg0.edata[dgl.EID][local_eid] == eid) lsrc, ldst = subg0.find_edges(local_eid) gsrc, gdst = subg0.ndata[dgl.NID][lsrc], subg0.ndata[dgl.NID][ldst] assert th.all(gsrc == lsrc) # gdst which is not assigned into current partition is not required to equal ldst assert th.all(th.logical_or(gdst == ldst, subg0.ndata['inner_node'][ldst] == 0)) etids, _ = gpb.map_to_per_etype(eid) src_tids, _ = gpb.map_to_per_ntype(gsrc) dst_tids, _ = gpb.map_to_per_ntype(gdst) canonical_etypes = [] etype_ids = th.arange(0, len(etypes)) for src_tid, etype_id, dst_tid in zip(src_tids, etype_ids, dst_tids):
def test_pairwise_gp(self): for batch_shape, dtype in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double)): tkwargs = {"device": self.device, "dtype": dtype} X_dim = 2 model, model_kwargs = self._get_model_and_data( batch_shape=batch_shape, X_dim=X_dim, **tkwargs) train_X = model_kwargs["datapoints"] train_comp = model_kwargs["comparisons"] # test training # regular training mll = PairwiseLaplaceMarginalLogLikelihood(model).to(**tkwargs) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=OptimizationWarning) fit_gpytorch_model(mll, options={"maxiter": 2}, max_retries=1) # prior training prior_m = PairwiseGP(None, None).to(**tkwargs) with self.assertRaises(RuntimeError): prior_m(train_X) # forward in training mode with non-training data custom_m = PairwiseGP(**model_kwargs) other_X = torch.rand(batch_shape + torch.Size([3, X_dim]), **tkwargs) other_comp = train_comp.clone() with self.assertRaises(RuntimeError): custom_m(other_X) custom_mll = PairwiseLaplaceMarginalLogLikelihood(custom_m).to( **tkwargs) post = custom_m(train_X) with self.assertRaises(RuntimeError): custom_mll(post, other_comp) # setting jitter = 0 with a singular covar will raise error sing_train_X = torch.ones(batch_shape + torch.Size([10, X_dim]), **tkwargs) with self.assertRaises(RuntimeError): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) custom_m = PairwiseGP(sing_train_X, train_comp, jitter=0) custom_m.posterior(sing_train_X) # test init self.assertIsInstance(model.mean_module, ConstantMean) self.assertIsInstance(model.covar_module, ScaleKernel) self.assertIsInstance(model.covar_module.base_kernel, RBFKernel) self.assertIsInstance( model.covar_module.base_kernel.lengthscale_prior, GammaPrior) self.assertIsInstance(model.covar_module.outputscale_prior, SmoothedBoxPrior) self.assertEqual(model.num_outputs, 1) self.assertEqual(model.batch_shape, batch_shape) # test custom models custom_m = PairwiseGP(**model_kwargs, covar_module=LinearKernel()) self.assertIsInstance(custom_m.covar_module, LinearKernel) # prior prediction prior_m = PairwiseGP(None, None).to(**tkwargs) prior_m.eval() post = prior_m.posterior(train_X) self.assertIsInstance(post, GPyTorchPosterior) # test trying adding jitter pd_mat = torch.eye(2, 2) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) jittered_pd_mat = model._add_jitter(pd_mat) diag_diff = (jittered_pd_mat - pd_mat).diagonal(dim1=-2, dim2=-1) self.assertTrue( torch.allclose( diag_diff, torch.full_like(diag_diff, model._jitter), atol=model._jitter / 10, )) # test initial utility val util_comp = torch.topk(model.utility, k=2, dim=-1).indices.unsqueeze(-2) self.assertTrue(torch.all(util_comp == train_comp)) # test posterior # test non batch evaluation X = torch.rand(batch_shape + torch.Size([3, X_dim]), **tkwargs) expected_shape = batch_shape + torch.Size([3, 1]) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual(posterior.mean.shape, expected_shape) self.assertEqual(posterior.variance.shape, expected_shape) # expect to raise error when output_indices is not None with self.assertRaises(RuntimeError): model.posterior(X, output_indices=[0]) # test re-evaluating utility when it's None model.utility = None posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) # test batch evaluation X = torch.rand(2, *batch_shape, 3, X_dim, **tkwargs) expected_shape = torch.Size([2]) + batch_shape + torch.Size([3, 1]) posterior = model.posterior(X) self.assertIsInstance(posterior, GPyTorchPosterior) self.assertEqual(posterior.mean.shape, expected_shape) # test input_transform # the untransfomed one should be stored normalize_tf = Normalize(d=2, bounds=torch.tensor([[0, 0], [0.5, 1.5]])) model = PairwiseGP(**model_kwargs, input_transform=normalize_tf) self.assertTrue(torch.all(model.datapoints == train_X)) # test set_train_data strict mode model = PairwiseGP(**model_kwargs) changed_train_X = train_X.unsqueeze(0) changed_train_comp = train_comp.unsqueeze(0) # expect to raise error when set data to something different with self.assertRaises(RuntimeError): model.set_train_data(changed_train_X, changed_train_comp, strict=True) # the same datapoints but changed comparison will also raise error with self.assertRaises(RuntimeError): model.set_train_data(train_X, changed_train_comp, strict=True)
def test_tensor_with_list(): this_tests(tensor) r = tensor(a) assert torch.all(r==exp)
def _test_single_corresponding_points_alignment( self, batch_size=10, n_points=100, dim=3, use_pointclouds=False, estimate_scale=False, reflect=False, allow_reflection=False, random_weights=False, ): """ Executes a single test for `corresponding_points_alignment` for a specific setting of the inputs / outputs. """ device = torch.device("cuda:0") # initialize the a ground truth point cloud X = TestCorrespondingPointsAlignment.init_point_cloud( batch_size=batch_size, n_points=n_points, dim=dim, device=device, use_pointclouds=use_pointclouds, random_pcl_size=True, ) # generate the true transformation R, T, s = TestCorrespondingPointsAlignment.generate_pcl_transformation( batch_size=batch_size, scale=estimate_scale, reflect=reflect, dim=dim, device=device, ) if reflect: # generate random reflection M and apply to the rotations M = TestCorrespondingPointsAlignment.generate_random_reflection( batch_size=batch_size, dim=dim, device=device ) R = torch.bmm(M, R) weights = None if random_weights: template = X.points_padded() if use_pointclouds else X weights = torch.rand_like(template[:, :, 0]) weights = weights / weights.sum(dim=1, keepdim=True) # zero out some weights as zero weights are a common use case # this guarantees there are no zero weight weights *= (weights * template.size()[1] > 0.3).to(weights) if use_pointclouds: # convert to List[Tensor] weights = [ w[:npts] for w, npts in zip(weights, X.num_points_per_cloud()) ] # apply the generated transformation to the generated # point cloud X X_t = _apply_pcl_transformation(X, R, T, s=s) # run the CorrespondingPointsAlignment algorithm R_est, T_est, s_est = points_alignment.corresponding_points_alignment( X, X_t, weights, allow_reflection=allow_reflection, estimate_scale=estimate_scale, ) assert_error_message = ( f"Corresponding_points_alignment assertion failure for " f"n_points={n_points}, " f"dim={dim}, " f"use_pointclouds={use_pointclouds}, " f"estimate_scale={estimate_scale}, " f"reflect={reflect}, " f"allow_reflection={allow_reflection}," f"random_weights={random_weights}." ) # if we test the weighted case, check that weights help with noise if random_weights and not use_pointclouds and n_points >= (dim + 10): # add noise to 20% points with smallest weight X_noisy = X_t.clone() _, mink_idx = torch.topk(-weights, int(n_points * 0.2), dim=1) mink_idx = mink_idx[:, :, None].expand(-1, -1, X_t.shape[-1]) X_noisy.scatter_add_( 1, mink_idx, 0.3 * torch.randn_like(mink_idx, dtype=X_t.dtype) ) def align_and_get_mse(weights_): R_n, T_n, s_n = points_alignment.corresponding_points_alignment( X_noisy, X_t, weights_, allow_reflection=allow_reflection, estimate_scale=estimate_scale, ) X_t_est = _apply_pcl_transformation(X_noisy, R_n, T_n, s=s_n) return (((X_t_est - X_t) * weights[..., None]) ** 2).sum( dim=(1, 2) ) / weights.sum(dim=-1) # check that using weights leads to lower weighted_MSE(X_noisy, X_t) self.assertTrue( torch.all(align_and_get_mse(weights) <= align_and_get_mse(None)) ) if reflect and not allow_reflection: # check that all rotations have det=1 self._assert_all_close( torch.det(R_est), R_est.new_ones(batch_size), assert_error_message, atol=2e-5, ) else: # mask out inputs with too few non-degenerate points for assertions w = ( torch.ones_like(R_est[:, 0, 0]) if weights is None or n_points >= dim + 10 else (weights > 0.0).all(dim=1).to(R_est) ) # check that the estimated tranformation is the same # as the ground truth if n_points >= (dim + 1): # the checks on transforms apply only when # the problem setup is unambiguous msg = assert_error_message self._assert_all_close(R_est, R, msg, w[:, None, None], atol=1e-5) self._assert_all_close(T_est, T, msg, w[:, None]) self._assert_all_close(s_est, s, msg, w) # check that the orthonormal part of the # transformation has a correct determinant (+1/-1) desired_det = R_est.new_ones(batch_size) if reflect: desired_det *= -1.0 self._assert_all_close(torch.det(R_est), desired_det, msg, w, atol=2e-5) # check that the transformed point cloud # X matches X_t X_t_est = _apply_pcl_transformation(X, R_est, T_est, s=s_est) self._assert_all_close( X_t, X_t_est, assert_error_message, w[:, None, None], atol=2e-5 )
def test_tensor_with_tensor(): this_tests(tensor) c=torch.tensor(a) r = tensor(c) assert r.data_ptr()==c.data_ptr() assert torch.all(r==exp)
def deepfool_attack(model, data, target, num_classes=10, overshoot=0.02, iter_num=50): # Check if the prediction is correct batch_size = data.size(0) data = data.to(device) data = Variable(data, requires_grad=True) output = model(data) init_pred = output.max(1, keepdim=True)[1] if torch.all(torch.eq(init_pred, target)): return 0, 0 else: current = init_pred # Set the current class of data # Calculate the loss i = 0 # Track iterations input_shape = data.shape # Get the input shape w = torch.zeros(input_shape) # Set weight r_out = torch.zeros(input_shape) # Set return value I = torch.argsort( output, dim=1, descending=True) # Get the index for the classes in a descending order # Start loop while (torch.all(torch.eq(init_pred, target))) or (i <= iter_num): pert = torch.tensor([np.inf for b in range(batch_size)]).to( device) # Set the initial perturbation to infinite # Calculate gradient for correct class output[list(range(batch_size)), list(I[:, 0])].sum().backward(retain_graph=True) original_grad = copy.deepcopy(data.grad.data) # Loop for num_classes for k in range(1, num_classes): # Calculate gradient zero_gradients(data) output[list(range(batch_size)), list(I[:, k])].sum().backward(retain_graph=True) current_grad = copy.deepcopy(data.grad) # Get w_k and f_k w_k = current_grad - original_grad f_k = (output[list(range(batch_size)), list(I[:, k])] - output[list(range(batch_size)), list(I[:, 0])]).data # Calculate pertubation for class k pert_k = abs(f_k) / (w_k.flatten().norm() + 0.001) ci = torch.where(pert_k < pert) pert[ci] = pert_k[ci] w[ci] = w_k[ci] # Return value for each time step r_i = pert[:, None, None, None].float() * w.float() / (w.norm() + 0.001) r_out = r_out + r_i # Apply new data to see if attack successful data.data = torch.clamp(data + r_out, 0, 1) output = model(data) current = output.max(1, keepdim=True)[1] i += 1 # Add iterative number data = torch.clamp(data + (1 + overshoot) * r_out, 0, 1) # Prepare output return init_pred, data
def cw_attack(model, data, target, targeted=False, num_classes=10, max_steps=100, lr=0.001, \ confidence=10, binary_search_steps=5, abort_early = True, clip_min=0, clip_max=1, clamp_fn='tanh', init_rand=False): def _compare(output, target): if not isinstance(output, (float, int, np.int64)) and len(output.shape) > 0: output = np.copy(output) if targeted: output[target] -= confidence else: output[target] += confidence output = np.argmax(output) if targeted: return output == target else: return output != target def _loss(output, target, dist, scale_const): # compute the probability of the label class versus the maximum other real = (target * output).sum(1) other = ((1. - target) * output - target * 10000.).max(1)[0] if targeted: # if targeted, optimize for making the other class most likely loss1 = torch.clamp(other - real + confidence, min=0.) # equiv to max(..., 0.) else: # if non-targeted, optimize for making this class least likely. loss1 = torch.clamp(real - other + confidence, min=0.) # equiv to max(..., 0.) loss1 = torch.sum(scale_const * loss1) loss2 = dist.sum() loss = loss1 + loss2 return loss def _optimize(optimizer, model, input_var, modifier_var, target_var, scale_const_var, input_orig=None): # apply modifier and clamp resulting image to keep bounded from clip_min to clip_max if clamp_fn == 'tanh': input_adv = tanh_rescale(modifier_var + input_var, clip_min, clip_max) else: input_adv = torch.clamp(modifier_var + input_var, clip_min, clip_max) output = model(input_adv) # distance to the original input data if input_orig is None: dist = l2_dist(input_adv, input_var, keepdim=False) else: dist = l2_dist(input_adv, input_orig, keepdim=False) loss = _loss(output, target_var, dist, scale_const_var) optimizer.zero_grad() loss.backward() optimizer.step() loss_np = loss.item() dist_np = dist.data output_np = output.data input_adv_np = input_adv.data.permute( 0, 2, 3, 1) # back to BHWC for numpy consumption return loss_np, dist_np, output_np, input_adv_np def torch_arctanh(x, eps=1e-6): x = x * (1. - eps) return (torch.log((1 + x) / (1 - x))) * 0.5 def tanh_rescale(x, x_min=-1., x_max=1.): return (torch.tanh(x) + 1) * 0.5 * (x_max - x_min) + x_min def l2_dist(x, y, keepdim=True): d = (x - y)**2 return reduce_sum(d, keepdim=keepdim) def reduce_sum(x, keepdim=True): for a in reversed(range(1, x.dim())): x = x.sum(a, keepdim=keepdim) return x repeat = binary_search_steps >= 10 data = data.to(device) data = Variable(data, requires_grad=True) output = model(data) init_pred = output.max(1, keepdim=True)[1] if torch.all(torch.eq(init_pred, target)): return 0, 0 target = target.to(device) batch_size = data.size(0) # set the lower and upper bounds accordingly lower_bound = np.zeros(batch_size) scale_const = np.ones(batch_size) * 0.1 upper_bound = np.ones(batch_size) * 1e10 # python/numpy placeholders for the overall best l2, label score, and adversarial image o_best_l2 = [1e10] * batch_size o_best_score = [-1] * batch_size o_best_attack = data.permute(0, 2, 3, 1) # setup input (image) variable, clamp/scale as necessary if clamp_fn == 'tanh': # convert to tanh-space, input already int -1 to 1 range, does it make sense to do # this as per the reference implementation or can we skip the arctanh? input_var = Variable(torch_arctanh(data), requires_grad=False) input_orig = tanh_rescale(input_var, clip_min, clip_max) else: input_var = Variable(data, requires_grad=False) input_orig = None # setup the target variable, we need it to be in one-hot form for the loss function target_onehot = torch.zeros(target.size() + (num_classes, )) target_onehot = target_onehot.to(device) target_onehot.scatter_(1, target.unsqueeze(1), 1.) target_var = Variable(target_onehot, requires_grad=False) # setup the modifier variable, this is the variable we are optimizing over modifier = torch.zeros(input_var.size()).float() if init_rand: # Experiment with a non-zero starting point... modifier = torch.normal(means=modifier, std=0.001) modifier = modifier.to(device) modifier_var = Variable(modifier, requires_grad=True) optimizer = optim.Adam([modifier_var], lr=0.0005) for search_step in range(binary_search_steps): best_l2 = [1e10] * batch_size best_score = [-1] * batch_size # The last iteration (if we run many steps) repeat the search once. if repeat and search_step == binary_search_steps - 1: scale_const = upper_bound scale_const_tensor = torch.from_numpy(scale_const).float() scale_const_tensor = scale_const_tensor.to(device) scale_const_var = Variable(scale_const_tensor, requires_grad=False) prev_loss = 1e6 for step in range(max_steps): # perform the attack loss, dist, output, adv_img = _optimize(optimizer, model, input_var, modifier_var, target_var, scale_const_var, input_orig) if abort_early and step % (max_steps // 10) == 0: if loss > prev_loss * .9999: break prev_loss = loss # update best result found for i in range(batch_size): target_label = target[i] output_logits = output[i] output_label = np.argmax(output_logits) di = dist[i] if di < best_l2[i] and _compare(output_logits, target_label): best_l2[i] = di best_score[i] = output_label if di < o_best_l2[i] and _compare(output_logits, target_label): o_best_l2[i] = di o_best_score[i] = output_label o_best_attack[i] = adv_img[i] # end inner step loop # adjust the constants batch_failure = 0 batch_success = 0 for i in range(batch_size): if _compare(best_score[i], target[i]) and best_score[i] != -1: # successful, do binary search and divide const by two upper_bound[i] = min(upper_bound[i], scale_const[i]) if upper_bound[i] < 1e9: scale_const[i] = (lower_bound[i] + upper_bound[i]) / 2 else: # failure, multiply by 10 if no solution found # or do binary search with the known upper bound lower_bound[i] = max(lower_bound[i], scale_const[i]) if upper_bound[i] < 1e9: scale_const[i] = (lower_bound[i] + upper_bound[i]) / 2 else: scale_const[i] *= 10 if _compare(o_best_score[i], target[i]) and o_best_score[i] != -1: batch_success += 1 else: batch_failure += 1 return init_pred, o_best_attack.permute(0, 3, 2, 1)
def test_load_obj_complex(self): obj_file = "\n".join([ "# this is a comment", # Comments should be ignored. "v 0.1 0.2 0.3", "v 0.2 0.3 0.4", "v 0.3 0.4 0.5", "v 0.4 0.5 0.6", "vn 0.000000 0.000000 -1.000000", "vn -1.000000 -0.000000 -0.000000", "vn -0.000000 -0.000000 1.000000", # Normals should not be ignored. "v 0.5 0.6 0.7", "vt 0.749279 0.501284 0.0", # Some files add 0.0 - ignore this. "vt 0.999110 0.501077", "vt 0.999455 0.750380", "f 1 2 3", "f 1 2 4 3 5", # Polygons should be split into triangles "f 2/1/2 3/1/2 4/2/2", # Texture/normals are loaded correctly. "f -1 -2 1", # Negative indexing counts from the end. ]) obj_file = StringIO(obj_file) verts, faces, aux = load_obj(obj_file) normals = aux.normals textures = aux.verts_uvs materials = aux.material_colors tex_maps = aux.texture_images expected_verts = torch.tensor( [ [0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6], [0.5, 0.6, 0.7], ], dtype=torch.float32, ) expected_faces = torch.tensor( [ [0, 1, 2], # First face [0, 1, 3], # Second face (polygon) [0, 3, 2], # Second face (polygon) [0, 2, 4], # Second face (polygon) [1, 2, 3], # Third face (normals / texture) [4, 3, 0], # Fourth face (negative indices) ], dtype=torch.int64, ) expected_normals = torch.tensor( [ [0.000000, 0.000000, -1.000000], [-1.000000, -0.000000, -0.000000], [-0.000000, -0.000000, 1.000000], ], dtype=torch.float32, ) expected_textures = torch.tensor( [[0.749279, 0.501284], [0.999110, 0.501077], [0.999455, 0.750380]], dtype=torch.float32, ) expected_faces_normals_idx = -torch.ones_like(expected_faces, dtype=torch.int64) expected_faces_normals_idx[4, :] = torch.tensor([1, 1, 1], dtype=torch.int64) expected_faces_textures_idx = -torch.ones_like(expected_faces, dtype=torch.int64) expected_faces_textures_idx[4, :] = torch.tensor([0, 0, 1], dtype=torch.int64) self.assertTrue(torch.all(verts == expected_verts)) self.assertTrue(torch.all(faces.verts_idx == expected_faces)) self.assertClose(normals, expected_normals) self.assertClose(textures, expected_textures) self.assertClose(faces.normals_idx, expected_faces_normals_idx) self.assertClose(faces.textures_idx, expected_faces_textures_idx) self.assertTrue(materials is None) self.assertTrue(tex_maps is None)
def run(self, obs): self.reset_tensors() obs = obs.to(self.device).float() / 255. hidden_state, reward, policy_logits, initial_value = self.network.initial_inference( obs) self.hidden_state[:, 0, :] = hidden_state self.q[:, 0] = initial_value.to(self.search_device) self.min_q = torch.min(self.q[:, 0], dim=-1)[0] self.max_q = torch.max(self.q[:, 0], dim=-1)[0] if self.args.q_dirichlet: self.add_exploration_noise() for sim_id in range(1, self.n_sims + 1): # Pre-compute action to select at each node in case it is visited in this sim actions = self.ucb_select_child(sim_id) self.id_current.fill_(0) self.search_depths.fill_(0) # Because the tree has exactly sim_id nodes, we are guaranteed # to take at most sim_id transitions (including expansion). for depth in range(sim_id): # Select the tensor of children of the current node current_children = self.id_children.gather( 1, self.id_current.unsqueeze(-1).expand( -1, -1, self.num_actions)) # Select the children corresponding to the current actions current_actions = actions.gather( 1, self.id_current.clamp_max(sim_id - 1)) id_next = current_children.squeeze().gather( -1, current_actions) self.search_actions[:, depth] = current_actions.squeeze() # Create a mask for live runs that will be true on the # exact step that a run terminates # A run terminates when its next state is unexpanded (null) # However, terminated runs also have this condition, so we # check that the current state is not yet null. done_mask = (id_next == self.id_null) live_mask = (self.id_current != self.id_null) final_mask = live_mask * done_mask # Note the final node id and action of terminated runs # to use in expansion. self.id_final[final_mask] = self.id_current[final_mask] self.actions_final[final_mask] = current_actions[final_mask] # If not done, increment search depths by one. self.search_depths[~done_mask] += 1 self.id_current = id_next if torch.all(done_mask): break input_state = self.hidden_state.gather( 1, self.id_final[:, :, None, None, None].expand(-1, -1, 256, 6, 6).to(self.device)).squeeze() hidden_state, reward, policy_logits, value = self.network.inference( input_state, self.actions_final.to(self.device)) value = value.to(self.search_device) # The new node is stored at entry sim_id self.hidden_state[:, sim_id, :] = hidden_state self.reward[self.batch_range, sim_id, self.actions_final.squeeze()] = reward.to( self.search_device) # self.prior[:, sim_id] = F.softmax(policy_logits, dim=-1) self.q[:, sim_id] = value # Store the pointers from parent to new node and back. self.id_children[self.batch_range, self.id_final.squeeze(), self.actions_final.squeeze()] = sim_id self.id_parent[:, sim_id] = self.id_final.squeeze() # The backup starts from the new node self.id_final.fill_(sim_id) self.backup(self.id_final, sim_id, value) # Get action, policy and value from the root after the search has finished action = self.select_action() if self.args.no_search_value_targets: value = initial_value.max(dim=-1)[0] else: value = self.q[:, 0].max(dim=-1)[0] return action, F.softmax(self.q[:, 0], dim=-1), value, initial_value.max(dim=-1)[0]
def optimize_tabular(self, agent, trajectory_buffer, update_target=False): with torch.no_grad(): N = len(trajectory_buffer.buffer) inner_states, outer_states, actions, action_distributions, rewards, dones, next_inner_states, \ next_outer_states = trajectory_buffer.sample(None, random_sample=False) PS_s = agent.concept_architecture(inner_states, outer_states)[0] concepts = PS_s.argmax(1).detach().cpu().numpy() next_PS_s = agent.concept_architecture( next_inner_states[-1, :].view(1, -1), next_outer_states[-1, :, :, :].unsqueeze(0))[0] next_concept = next_PS_s.argmax(1).detach().cpu().numpy() next_concepts = np.concatenate([concepts[1:], next_concept]) PA_S, log_PA_S = agent.PA_S() HA_gS = -(PA_S * log_PA_S).sum(1) HA_S = (self.PS.view(-1) * HA_gS).sum() Alpha = agent.log_Alpha.exp().item() assert torch.isfinite(HA_S).all(), 'Alahuakbar' # PA_s = agent.second_level_architecture.actor(inner_states, outer_states)[0] ratios = PA_S[concepts, actions] / action_distributions[np.arange(0, N), actions] if self.clip_ratios: ratios = ratios.clip(0.0, 1.0) assert torch.isfinite(ratios).all(), 'Alahuakbar 1' Q = (1. - self.forgetting_factor) * agent.Q_table.detach().clone() C = (1. - self.forgetting_factor) * agent.C_table.detach().clone() assert torch.isfinite(Q).all(), 'Alahuakbar 2' assert torch.isfinite(C).all(), 'Alahuakbar 3' if N > 0: G = 0 WIS_trajectory = 1 for i in range(N - 1, -1, -1): S, A, R, WIS_step, nS = concepts[i], actions[i], rewards[ i], ratios[i], next_concepts[i] G = self.discount_factor * G + R if self.MC_entropy: dH = HA_gS[nS] - HA_S G += self.discount_factor * Alpha * dH C[S, A] = C[S, A] + WIS_trajectory if torch.is_nonzero(C[S, A]): assert torch.isfinite(C[S, A]), 'Infinity and beyond!' Q[S, A] = Q[S, A] + (WIS_trajectory / C[S, A]) * (G - Q[S, A]) WIS_trajectory = WIS_trajectory * WIS_step if self.clip_ratios: WIS_trajectory = WIS_trajectory.clip(0.0, 10.0) if not torch.is_nonzero(WIS_trajectory): break dQ = (Q - agent.Q_table).pow(2).mean() agent.update_Q(Q, C) if update_target: agent.update_target(self.MC_update_rate) Pi = agent.Pi_table.detach().clone() log_Pi = torch.log(Pi) HA_gS = -(Pi * log_Pi).sum(1) HA_S = (self.PS.view(-1) * HA_gS).sum() assert torch.isfinite(HA_S).all(), 'Alahuakbar' # Optimize Alpha agent.update_Alpha(HA_S) # Optimize policy Alpha = agent.log_Alpha.exp().item() duals = (1e-3) * torch.ones_like(self.PS.view(-1, 1)) found_policy = False iters_left = 8 while not found_policy and iters_left > 0: Q_adjusted = (Q + Alpha * duals * log_Pi) / (1. + duals) Pi_new = torch.exp(Q_adjusted / (Alpha + 1e-10)) Pi_new = Pi_new / Pi_new.sum(1, keepdim=True) log_Pi_new = torch.log(Pi_new + 1e-10) KL_div = (Pi_new * (log_Pi_new - log_Pi)).sum(1, keepdim=True) valid_policies = KL_div <= self.policy_divergence_limit if torch.all(valid_policies): found_policy = True else: iters_left -= 1 duals = 10**(1. - valid_policies.float()) * duals if found_policy: agent.update_policy(Pi_new) metrics = { 'Q_change': dQ.item(), 'entropy': HA_S.item(), 'Alpha': Alpha, 'found_policy': float(found_policy), 'max_dual': duals.max().item(), } return metrics