def __add__(self, other: Self) -> Self: copy = super().__add__(other) copy.iw = torch.cat( [torch.atleast_1d(copy.iw), torch.atleast_1d(other.iw)], dim=0, ) return copy
def forward(self, obs: Optional[to.Tensor] = None) -> to.Tensor: # Check in which time regime the policy/environment is currently in if self.t_curr < self.t_end: # Get a vector of powers of the current time t_powers = to.tensor( [self.t_curr**o for o in range(self.order + 1)], dtype=self.coeffs.dtype) # Compute the action act = to.mv(self.coeffs.T, t_powers) elif self.overtime_behavior == "hold": # Get a vector of powers of the current time t_powers = to.tensor( [self.t_end**o for o in range(self.order + 1)], dtype=self.coeffs.dtype) # Compute the action act = to.mv(self.coeffs.T, t_powers) else: # self.overtime_behavior == "zero" act = to.zeros(self.act_space_shape, dtype=self.coeffs.dtype) # Advance the internal time counter self.t_curr += self.dt return to.atleast_1d(act)
def atleast_1d(tensor_or_array: Union[torch.Tensor, np.ndarray]): if isinstance(tensor_or_array, torch.Tensor): if hasattr(torch, "atleast_1d"): tensor_or_array = torch.atleast_1d(tensor_or_array) elif tensor_or_array.ndim < 1: tensor_or_array = tensor_or_array[None] else: tensor_or_array = np.atleast_1d(tensor_or_array) return tensor_or_array
def equal(y_pred: Tensor, *, y_true: Tensor) -> Tensor: y_true = torch.atleast_1d(y_true.squeeze()).long() if len(y_pred) != len(y_true): raise ValueError( "'y_pred' and 'y_true' must match in size at dimension 0.") # Interpret floating point predictions as potential logits and attempt to convert # them to hard predictions. if y_pred.is_floating_point(): y_pred = hard_prediction(y_pred) return (y_pred == y_true).float()
def _lp_fn(self, params): params_constrained = { k: self.transforms[k].inv(v) for k, v in params.items() } cond_model = poutine.condition(self.model, params_constrained) model_trace = poutine.trace(cond_model).get_trace( *self.model_args, **self.model_kwargs) log_joint = torch.atleast_1d( self.trace_prob_evaluator.log_prob(model_trace)) for name, t in self.transforms.items(): log_joint -= t.log_abs_det_jacobian(params_constrained[name], params[name]) return log_joint
def loss_map(model, test_dataset, save, bounds=None): f, a = plt.subplots(figsize=(5, 4)) bounds = bounds if bounds is not None else [[-2, -2], [2, 2]] (fs, ifs, ifs_star, out, dout) = test_dataset.tensors # Feasible points a.scatter(fs[:, 0], fs[:, 1], s=20., c='green', marker='+') if fs.shape[0] > 0: cls = torch.atleast_1d(model.classify(fs)) a.scatter(fs[:, 0], fs[:, 1], s=20., c=cls, marker='o') for i in range(ifs.size(0)): a.plot([ifs[i, 0], ifs_star[i, 0]], [ifs[i, 1], ifs_star[i, 1]], '--+r') # Infeasible points ifs.requires_grad = True _o_ = model._net(ifs) jpred = (out - _o_).abs().detach() ifs.requires_grad = False # import ipdb; ipdb.set_trace() a.scatter(ifs[:, 0], ifs[:, 1], s=20., c=jpred.log().squeeze(), marker='o') for i in range(ifs.size(0)): a.annotate('{0:.2e}'.format(jpred.squeeze()[i]), (ifs[i, 0], ifs[i, 1]), fontsize='xx-small') # Projected points ifs_star.requires_grad = True _o_ = model._net(ifs_star) jpred = (_o_).abs().detach() ifs_star.requires_grad = False a.scatter(ifs_star[:, 0], ifs_star[:, 1], s=20., c=jpred.log().squeeze(), marker='+') for i in range(ifs.size(0)): a.annotate('{0:.2e}'.format(jpred.squeeze()[i]), (ifs_star[i, 0], ifs_star[i, 1]), fontsize='xx-small') a.set_xlim([bounds[0][0], bounds[1][0]]) a.set_ylim([bounds[0][1], bounds[1][1]]) f.savefig(save + '.pdf', bbox_inches='tight')
def forward(self, meas: to.Tensor) -> to.Tensor: meas = meas.to(dtype=to.get_default_dtype()) # Unpack the raw measurement (is not an observation) err = self.state_des - meas # th, al, thd, ald if all(to.abs(err) <= self.tols): self.done = True elif all(to.abs(err) > self.tols ) and self.state_des[0] == self.state_des[1] == 0.0: # In case of initializing the Qube, increase the P-gain over time. This is useful since the resistance from # the Qube's cable can be too strong for the PD controller to reach the steady state, like a fake I-gain. self.pd_gains.data = self.pd_gains + to.tensor( [0.01, 0.0, 0.0, 0.0]) # no in-place op because of grad self.pd_gains.data = to.min( self.pd_gains, to.tensor([20.0, pyrado.inf, pyrado.inf, pyrado.inf])) # PD control return to.atleast_1d(self.pd_gains.dot(err))
def lsmr(A, b, damp=0., atol=1e-6, btol=1e-6, conlim=1e8, maxiter=None, x0=None, check_nonzero=True): """Iterative solver for least-squares problems. lsmr solves the system of linear equations ``Ax = b``. If the system is inconsistent, it solves the least-squares problem ``min ||b - Ax||_2``. ``A`` is a rectangular matrix of dimension m-by-n, where all cases are allowed: m = n, m > n, or m < n. ``b`` is a vector of length m. The matrix A may be dense or sparse (usually sparse). Parameters ---------- A : {matrix, sparse matrix, ndarray, LinearOperator} Matrix A in the linear system. Alternatively, ``A`` can be a linear operator which can produce ``Ax`` and ``A^H x`` using, e.g., ``scipy.sparse.linalg.LinearOperator``. b : array_like, shape (m,) Vector ``b`` in the linear system. damp : float Damping factor for regularized least-squares. `lsmr` solves the regularized least-squares problem:: min ||(b) - ( A )x|| ||(0) (damp*I) ||_2 where damp is a scalar. If damp is None or 0, the system is solved without regularization. atol, btol : float, optional Stopping tolerances. `lsmr` continues iterations until a certain backward error estimate is smaller than some quantity depending on atol and btol. Let ``r = b - Ax`` be the residual vector for the current approximate solution ``x``. If ``Ax = b`` seems to be consistent, ``lsmr`` terminates when ``norm(r) <= atol * norm(A) * norm(x) + btol * norm(b)``. Otherwise, lsmr terminates when ``norm(A^H r) <= atol * norm(A) * norm(r)``. If both tolerances are 1.0e-6 (say), the final ``norm(r)`` should be accurate to about 6 digits. (The final ``x`` will usually have fewer correct digits, depending on ``cond(A)`` and the size of LAMBDA.) If `atol` or `btol` is None, a default value of 1.0e-6 will be used. Ideally, they should be estimates of the relative error in the entries of ``A`` and ``b`` respectively. For example, if the entries of ``A`` have 7 correct digits, set ``atol = 1e-7``. This prevents the algorithm from doing unnecessary work beyond the uncertainty of the input data. conlim : float, optional `lsmr` terminates if an estimate of ``cond(A)`` exceeds `conlim`. For compatible systems ``Ax = b``, conlim could be as large as 1.0e+12 (say). For least-squares problems, `conlim` should be less than 1.0e+8. If `conlim` is None, the default value is 1e+8. Maximum precision can be obtained by setting ``atol = btol = conlim = 0``, but the number of iterations may then be excessive. maxiter : int, optional `lsmr` terminates if the number of iterations reaches `maxiter`. The default is ``maxiter = min(m, n)``. For ill-conditioned systems, a larger value of `maxiter` may be needed. x0 : array_like, shape (n,), optional Initial guess of ``x``, if None zeros are used. Returns ------- x : ndarray of float Least-square solution returned. itn : int Number of iterations used. """ A = aslinearoperator(A) b = torch.atleast_1d(b) if b.dim() > 1: b = b.squeeze() eps = torch.finfo(b.dtype).eps damp = torch.as_tensor(damp, dtype=b.dtype, device=b.device) ctol = 1 / conlim if conlim > 0 else 0. m, n = A.shape if maxiter is None: maxiter = min(m, n) u = b.clone() normb = b.norm() if x0 is None: x = b.new_zeros(n) beta = normb.clone() else: x = torch.atleast_1d(x0).clone() u.sub_(A.matvec(x)) beta = u.norm() if beta > 0: u.div_(beta) v = A.rmatvec(u) alpha = v.norm() else: v = b.new_zeros(n) alpha = b.new_tensor(0) v = torch.where(alpha > 0, v / alpha, v) # Initialize variables for 1st iteration. zetabar = alpha * beta alphabar = alpha.clone() rho = b.new_tensor(1) rhobar = b.new_tensor(1) cbar = b.new_tensor(1) sbar = b.new_tensor(0) h = v.clone() hbar = b.new_zeros(n) # Initialize variables for estimation of ||r||. betadd = beta.clone() betad = b.new_tensor(0) rhodold = b.new_tensor(1) tautildeold = b.new_tensor(0) thetatilde = b.new_tensor(0) zeta = b.new_tensor(0) d = b.new_tensor(0) # Initialize variables for estimation of ||A|| and cond(A) normA2 = alpha.square() maxrbar = b.new_tensor(0) minrbar = b.new_tensor(0.99 * torch.finfo(b.dtype).max) normA = normA2.sqrt() condA = b.new_tensor(1) normx = b.new_tensor(0) normar = b.new_tensor(0) normr = b.new_tensor(0) # extra buffers (added by Reuben) c = b.new_tensor(0) s = b.new_tensor(0) chat = b.new_tensor(0) shat = b.new_tensor(0) alphahat = b.new_tensor(0) ctildeold = b.new_tensor(0) stildeold = b.new_tensor(0) rhotildeold = b.new_tensor(0) rhoold = b.new_tensor(0) rhobarold = b.new_tensor(0) zetaold = b.new_tensor(0) thetatildeold = b.new_tensor(0) betaacute = b.new_tensor(0) betahat = b.new_tensor(0) betacheck = b.new_tensor(0) taud = b.new_tensor(0) # Main iteration loop. for itn in range(1, maxiter+1): # Perform the next step of the bidiagonalization to obtain the # next beta, u, alpha, v. These satisfy the relations # beta*u = a*v - alpha*u, # alpha*v = A'*u - beta*v. u.mul_(-alpha).add_(A.matvec(v)) torch.norm(u, out=beta) if (not check_nonzero) or beta > 0: # check_nonzero option provides a means to avoid the GPU-CPU # synchronization of a `beta > 0` check. For most cases # beta == 0 is unlikely, but use this option with caution. u.div_(beta) v.mul_(-beta).add_(A.rmatvec(u)) torch.norm(v, out=alpha) v = torch.where(alpha > 0, v / alpha, v) # At this point, beta = beta_{k+1}, alpha = alpha_{k+1}. _sym_ortho(alphabar, damp, out=(chat, shat, alphahat)) # Use a plane rotation (Q_i) to turn B_i to R_i rhoold.copy_(rho, non_blocking=True) _sym_ortho(alphahat, beta, out=(c, s, rho)) thetanew = torch.mul(s, alpha) torch.mul(c, alpha, out=alphabar) # Use a plane rotation (Qbar_i) to turn R_i^T to R_i^bar rhobarold.copy_(rhobar, non_blocking=True) zetaold.copy_(zeta, non_blocking=True) thetabar = sbar * rho rhotemp = cbar * rho _sym_ortho(cbar * rho, thetanew, out=(cbar, sbar, rhobar)) torch.mul(cbar, zetabar, out=zeta) zetabar.mul_(-sbar) # Update h, h_hat, x. hbar.mul_(-thetabar * rho).div_(rhoold * rhobarold) hbar.add_(h) x.addcdiv_(zeta * hbar, rho * rhobar) h.mul_(-thetanew).div_(rho) h.add_(v) # Estimate of ||r||. # Apply rotation Qhat_{k,2k+1}. torch.mul(chat, betadd, out=betaacute) torch.mul(-shat, betadd, out=betacheck) # Apply rotation Q_{k,k+1}. torch.mul(c, betaacute, out=betahat) torch.mul(-s, betaacute, out=betadd) # Apply rotation Qtilde_{k-1}. # betad = betad_{k-1} here. thetatildeold.copy_(thetatilde, non_blocking=True) _sym_ortho(rhodold, thetabar, out=(ctildeold, stildeold, rhotildeold)) torch.mul(stildeold, rhobar, out=thetatilde) torch.mul(ctildeold, rhobar, out=rhodold) betad.mul_(-stildeold).addcmul_(ctildeold, betahat) # betad = betad_k here. # rhodold = rhod_k here. tautildeold.mul_(-thetatildeold).add_(zetaold).div_(rhotildeold) torch.div(zeta - thetatilde * tautildeold, rhodold, out=taud) d.addcmul_(betacheck, betacheck) torch.sqrt(d + (betad - taud).square() + betadd.square(), out=normr) # Estimate ||A||. normA2.addcmul_(beta, beta) torch.sqrt(normA2, out=normA) normA2.addcmul_(alpha, alpha) # Estimate cond(A). torch.max(maxrbar, rhobarold, out=maxrbar) if itn > 1: torch.min(minrbar, rhobarold, out=minrbar) # ------- Test for convergence -------- if itn % 10 == 0: # Compute norms for convergence testing. torch.abs(zetabar, out=normar) torch.norm(x, out=normx) torch.div(torch.max(maxrbar, rhotemp), torch.min(minrbar, rhotemp), out=condA) # Now use these norms to estimate certain other quantities, # some of which will be small near a solution. test1 = normr / normb test2 = normar / (normA * normr + eps) test3 = 1 / (condA + eps) t1 = test1 / (1 + normA * normx / normb) rtol = btol + atol * normA * normx / normb # The first 3 tests guard against extremely small values of # atol, btol or ctol. (The user may have set any or all of # the parameters atol, btol, conlim to 0.) # The effect is equivalent to the normAl tests using # atol = eps, btol = eps, conlim = 1/eps. # The second 3 tests allow for tolerances set by the user. stop = ((1 + test3 <= 1) | (1 + test2 <= 1) | (1 + t1 <= 1) | (test3 <= ctol) | (test2 <= atol) | (test1 <= rtol)) if stop: break return x, itn
def other_ops(self): a = torch.randn(4) b = torch.randn(4) c = torch.randint(0, 8, (5, ), dtype=torch.int64) e = torch.randn(4, 3) f = torch.randn(4, 4, 4) size = [0, 1] dims = [0, 1] return ( torch.atleast_1d(a), torch.atleast_2d(a), torch.atleast_3d(a), torch.bincount(c), torch.block_diag(a), torch.broadcast_tensors(a), torch.broadcast_to(a, (4)), # torch.broadcast_shapes(a), torch.bucketize(a, b), torch.cartesian_prod(a), torch.cdist(e, e), torch.clone(a), torch.combinations(a), torch.corrcoef(a), # torch.cov(a), torch.cross(e, e), torch.cummax(a, 0), torch.cummin(a, 0), torch.cumprod(a, 0), torch.cumsum(a, 0), torch.diag(a), torch.diag_embed(a), torch.diagflat(a), torch.diagonal(e), torch.diff(a), torch.einsum("iii", f), torch.flatten(a), torch.flip(e, dims), torch.fliplr(e), torch.flipud(e), torch.kron(a, b), torch.rot90(e), torch.gcd(c, c), torch.histc(a), torch.histogram(a), torch.meshgrid(a), torch.lcm(c, c), torch.logcumsumexp(a, 0), torch.ravel(a), torch.renorm(e, 1, 0, 5), torch.repeat_interleave(c), torch.roll(a, 1, 0), torch.searchsorted(a, b), torch.tensordot(e, e), torch.trace(e), torch.tril(e), torch.tril_indices(3, 3), torch.triu(e), torch.triu_indices(3, 3), torch.vander(a), torch.view_as_real(torch.randn(4, dtype=torch.cfloat)), torch.view_as_complex(torch.randn(4, 2)), torch.resolve_conj(a), torch.resolve_neg(a), )
def get_ml_posterior_samples( dp_mapping: Mapping[int, str], posterior: DirectPosterior, data_real: to.Tensor, num_eval_samples: int, num_ml_samples: int = 1, calculate_log_probs: bool = True, normalize_posterior: bool = True, subrtn_sbi_sampling_hparam: Optional[dict] = None, return_as_tensor: bool = False, ) -> Tuple[Union[List[List[Dict]], to.Tensor], Optional[to.Tensor]]: r""" Evaluate the posterior conditioned on the data `data_real`, and extract the `num_ml_samples` most likely domain parameter sets. :param dp_mapping: mapping from subsequent integers (starting at 0) to domain parameter names (e.g. mass) :param posterior: posterior to evaluate, e.g. a normalizing flow, that samples domain parameters conditioned on the provided data :param data_real: data from the real-world rollouts a.k.a. set of $x_o$ of shape [num_iter, num_rollouts_per_iter, time_series_length, dim_data] :param num_eval_samples: number of samples to draw from the posterior :param num_ml_samples: number of most likely samples, i.e. 1 equals argmax :param calculate_log_probs: if `True` the log-probabilities are computed, else `None` is returned :param normalize_posterior: if `True` the normalization of the posterior density is enforced by sbi :param subrtn_sbi_sampling_hparam: keyword arguments forwarded to sbi's `DirectPosterior.sample()` function :param return_as_tensor: if `True`, return the most likely domain parameter sets as a tensor of shape [num_iter, num_ml_samples, dim_domain_param], else as a list of dict :return: most likely domain parameters sets sampled form the posterior, and the associated log probabilities """ if not isinstance(num_ml_samples, int) or num_ml_samples < 1: raise pyrado.ValueErr(given=num_ml_samples, g_constraint="0 (int)") if num_eval_samples < num_ml_samples: raise pyrado.ValueErr(given=num_ml_samples, le_constraint=num_eval_samples) # Evaluate the posterior domain_params, log_probs = SBIBase.eval_posterior( posterior, data_real, num_eval_samples, calculate_log_probs, normalize_posterior, subrtn_sbi_sampling_hparam, ) # Extract the most likely domain parameter sets for every target domain data set domain_params_ml = [] log_probs_ml = to.empty(log_probs.shape[0], num_ml_samples) for idx_r in range(domain_params.shape[0]): idcs_sorted = to.argsort(log_probs[idx_r, :], descending=True) idcs_ml = idcs_sorted[:num_ml_samples] log_probs_ml[idx_r, :] = log_probs[idx_r, idcs_ml] dp_vals = domain_params[idx_r, idcs_ml, :] if return_as_tensor: # Return as tensor domain_params_ml.append(dp_vals) else: # Return as dict dp_vals = to.atleast_1d(dp_vals).numpy() domain_param_ml = [ dict(zip(dp_mapping.values(), dpv)) for dpv in dp_vals ] domain_params_ml.append(domain_param_ml) if return_as_tensor: domain_params_ml = to.stack(domain_params_ml, dim=0) if not domain_params_ml.shape == (domain_params.shape[0], num_ml_samples, len(dp_mapping)): raise pyrado.ShapeErr(given=domain_params_ml, expected_match=(domain_params.shape[0], num_ml_samples, len(dp_mapping))) else: # Check the first element if len(domain_params_ml[0]) != num_ml_samples or len( domain_params_ml[0][0]) != len(dp_mapping): raise pyrado.ShapeErr( msg= f"The max likelihood domain parameter sets need to be of length {num_ml_samples}, but are " f"{domain_params_ml[0]}, and the domain parameter sets need to be of length {len(dp_mapping)}, but " f"are {len(domain_params_ml[0][0])}!") return domain_params_ml, log_probs_ml
def test_schema_check_mode_empty_list_input(self): expected = torch.atleast_1d([]) with enable_torch_dispatch_mode(SchemaCheckMode()): actual = torch.atleast_1d([]) self.assertEqual(expected, actual)
def forward(self, obs: Optional[to.Tensor] = None) -> to.Tensor: act = to.as_tensor(self._fcn_of_time(self._t_curr), dtype=to.get_default_dtype(), device=self.device) self._t_curr += self._dt return to.atleast_1d(act)
def _transform( self, inputs: Tensor, *, targets: Tensor | None = None, group_labels: Tensor | None = None) -> Tensor | InputsTargetsPair: batch_size = len(inputs) # If the batch is singular or the sampling probability is 0 there's nothing to do. if (batch_size == 1) or (self.p == 0): if targets is None: return inputs return InputsTargetsPair(inputs=inputs, targets=targets) elif self.p < 1: # Sample a mask determining which samples in the batch are to be transformed selected = torch.rand(batch_size, device=inputs.device) < self.p num_selected = int(selected.count_nonzero()) indices = selected.nonzero(as_tuple=False).long().flatten() # if p >= 1 then the transform is always applied and we can skip # the above step else: num_selected = batch_size indices = torch.arange(batch_size, device=inputs.device, dtype=torch.long) if group_labels is None: # Sample the mixup pairs with the guarantee that a given sample will # not be paired with itself offset = torch.randint(low=1, high=batch_size, size=(num_selected, ), device=inputs.device, dtype=torch.long) pair_indices = (indices + offset) % batch_size else: if group_labels.numel() != batch_size: raise ValueError( "The number of elements in 'group_labels' should match the size of dimension 0 of 'inputs'." ) group_labels = group_labels.view(batch_size, 1) # [batch_size] # Compute the pairwise indicator matrix, indicating whether any two samples # belong to the same group (0) or different groups (1) is_diff_group = group_labels[indices] != group_labels.t( ) # [num_selected, batch_size] # For each sample, compute how many other samples there are that belong # to a different group. diff_group_counts = is_diff_group.count_nonzero( dim=1) # [num_selected] if torch.any(diff_group_counts == 0): raise RuntimeError( f"No samples from different groups to sample as mixup pairs for one or more groups." ) # Sample the mixup pairs via cross-group sampling, meaning samples are paired exclusively # with samples from other groups. This can be efficiently done as follows: # 1) Sample uniformly from {0, ..., diff_group_count - 1} to obtain the groupwise pair indices. # This involves first drawing samples from the standard uniform distribution, rescaling them to # [-1/(2*diff_group_count), diff_group_count + (1/(2*diff_group_count)], and then clamping them # to [0, 1], making it so that 0 and diff_group_count have the same probability of being drawn # as any other value. The uniform samples are then mapped to indices by multiplying by # diff_group_counts and rounding. 'randint' is unsuitable here because the groups aren't # guaranteed to have equal cardinality (using it to sample from the cyclic group, # Z / diff_group_count Z, as above, leads to biased sampling). rel_pair_indices = batched_randint(diff_group_counts) # 2) Convert the row-wise indices into row-major indices, considering only # only the postive entries in the rows. rel_pair_indices[1:] += diff_group_counts.cumsum(dim=0)[:-1] # 3) Finally, map from group-relative indices to absolute ones. _, abs_pos_inds = is_diff_group.nonzero(as_tuple=True) pair_indices = abs_pos_inds[rel_pair_indices] # Sample the mixing weights if self.featurewise: sample_shape = (num_selected, *inputs.shape[1:]) else: sample_shape = (num_selected, *((1, ) * (inputs.ndim - 1))) lambdas = self.lambda_sampler.sample( sample_shape=torch.Size(sample_shape)).to(inputs.device) if not self.inplace: inputs = inputs.clone() # Apply mixup to the inputs inputs[indices] = self._mix(tensor_a=inputs[indices], tensor_b=inputs[pair_indices], lambda_=lambdas) if targets is None: return inputs # Targets are label-encoded and need to be one-hot encoded prior to mixup. if torch.atleast_1d(targets.squeeze()).ndim == 1: if self.num_classes is None: raise RuntimeError( f"{self.__class__.__name__} can only be applied to label-encoded targets if " "'num_classes' is specified.") targets = cast(Tensor, F.one_hot(targets, num_classes=self.num_classes)) elif not self.inplace: targets = targets.clone() # Targets need to be floats to be mixed up targets = targets.float() # Use the empirical mean of the lambdas for interpolating the targets if the lambdas # were sampled feasture-wise, else just use the lambdas as is. target_lambdas = lambdas = (lambdas.flatten( start_dim=1).mean(1) if self.featurewise else lambdas) # Add singular dimensions to lambdas for broadcasting target_lambdas = lambdas.view(num_selected, *((1, ) * (targets.ndim - 1))) # Apply mixup to the targets targets[indices] = self._mix(tensor_a=targets[indices], tensor_b=targets[pair_indices], lambda_=target_lambdas) return InputsTargetsPair(inputs, targets)
def _transform( self, inputs: Tensor, *, targets: Tensor | None = None) -> Tensor | InputsTargetsPair: if inputs.ndim != 4: raise ValueError( f"'inputs' must be a batch of image tensors of shape (C, H, W)." ) batch_size = len(inputs) if (targets is not None) and (batch_size != len(targets)): raise ValueError( f"'inputs' and 'targets' must match in size at dimension 0.") generator = (None if self.seed is None else torch.Generator( inputs.device).manual_seed(self.seed)) if (batch_size == 1) or (self.p == 0): return inputs if targets is None else InputsTargetsPair( inputs=inputs, targets=targets) elif self.p < 1: # Sample a mask determining which samples in the batch are to be transformed selected = torch.rand( batch_size, device=inputs.device, generator=generator) < self.p num_selected = int(selected.count_nonzero()) if num_selected == 0: return (inputs if targets is None else InputsTargetsPair( inputs=inputs, targets=targets)) indices = selected.nonzero(as_tuple=False).long().flatten() # If p >= 1 then the transform is always applied and we can skip the sampling step above. else: num_selected = batch_size indices = torch.arange(batch_size, device=inputs.device, dtype=torch.long) # Pair each selected sample with another sample that will serve as the 'patch donor' pair_indices = torch.arange(num_selected).roll(1, 0) masks, cropped_area_ratios = self._sample_masks( inputs=inputs, num_samples=num_selected, generator=generator) if not self.inplace: inputs = inputs.clone() # Trnasplant patches from the paired images to the anchor images as determined by the masks. inputs[ indices] = ~masks * inputs[indices] + masks * inputs[pair_indices] # No targets were recevied so we're done. if targets is None: return inputs # Targets are label-encoded and need to be one-hot encoded prior to mixup. if torch.atleast_1d(targets.squeeze()).ndim == 1: if self.num_classes is None: raise RuntimeError( f"{self.__class__.__name__} can only be applied to label-encoded targets if " "'num_classes' is specified.") targets = cast(Tensor, F.one_hot(targets, num_classes=self.num_classes)) elif not self.inplace: targets = targets.clone() # Targets need to be floats to be mixed up. targets = targets.float() target_lambdas = 1.0 - cropped_area_ratios target_lambdas.unsqueeze_(-1) targets[indices] *= target_lambdas targets[indices] += (1.0 - target_lambdas) * targets[pair_indices] return InputsTargetsPair(inputs=inputs, targets=targets)
def least_squares(fun, x0, bounds=None, method='trf', ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, tr_solver='lsmr', tr_options=None, max_nfev=None, verbose=0): r"""Solve a nonlinear least-squares problem with bounds on the variables. Given the residual function :math:`f: \mathcal{R}^n \rightarrow \mathcal{R}^m`, `least_squares` finds a local minimum of the residual sum-of-squares (RSS) objective: .. math:: x^* = \underset{x}{\operatorname{arg\,min\,}} \frac{1}{2} ||f(x)||_2^2 \quad \text{subject to} \quad lb \leq x \leq ub The solution is found using variants of the Gauss-Newton method, a modification of Newton's method tailored to RSS problems. Parameters ---------- fun : callable Function which computes the vector of residuals, with the signature ``fun(x)``. The argument ``x`` passed to this function is a Tensor of shape (n,) (never a scalar, even for n=1). It must allocate and return a 1-D Tensor of shape (m,) or a scalar. x0 : Tensor or float Initial guess on independent variables, with shape (n,). If float, it will be treated as a 1-D Tensor with one element. bounds : 2-tuple of Tensor, optional Lower and upper bounds on independent variables. Defaults to no bounds. Each Tensor must match the size of `x0` or be a scalar, in the latter case a bound will be the same for all variables. Use ``inf`` with an appropriate sign to disable bounds on all or some variables. method : str, optional Algorithm to perform minimization. Default is 'trf'. * 'trf' : Trust Region Reflective algorithm, particularly suitable for large sparse problems with bounds. Generally robust method. * 'dogbox' : COMING SOON. dogleg algorithm with rectangular trust regions, typical use case is small problems with bounds. Not recommended for problems with rank-deficient Jacobian. ftol : float or None, optional Tolerance for termination by the change of the cost function. The optimization process is stopped when ``dF < ftol * F``, and there was an adequate agreement between a local quadratic model and the true model in the last step. If None, the termination by this condition is disabled. Default is 1e-8. xtol : float or None, optional Tolerance for termination by the change of the independent variables. Termination occurs when ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by this condition is disabled. Default is 1e-8. gtol : float or None, optional Tolerance for termination by the norm of the gradient. Default is 1e-8. The exact condition depends on `method` used: * For 'trf' : ``norm(g_scaled, ord=inf) < gtol``, where ``g_scaled`` is the value of the gradient scaled to account for the presence of the bounds [STIR]_. * For 'dogbox' : ``norm(g_free, ord=inf) < gtol``, where ``g_free`` is the gradient with respect to the variables which are not in the optimal state on the boundary. x_scale : Tensor or 'jac', optional Characteristic scale of each variable. Setting `x_scale` is equivalent to reformulating the problem in scaled variables ``xs = x / x_scale``. An alternative view is that the size of a trust region along jth dimension is proportional to ``x_scale[j]``. Improved convergence may be achieved by setting `x_scale` such that a step of a given size along any of the scaled variables has a similar effect on the cost function. If set to 'jac', the scale is iteratively updated using the inverse norms of the columns of the Jacobian matrix (as described in [JJMore]_). max_nfev : None or int, optional Maximum number of function evaluations before the termination. Defaults to 100 * n. tr_solver : str, optional Method for solving trust-region subproblems. * 'exact' is suitable for not very large problems with dense Jacobian matrices. The computational complexity per iteration is comparable to a singular value decomposition of the Jacobian matrix. * 'lsmr' is suitable for problems with sparse and large Jacobian matrices. It uses an iterative procedure for finding a solution of a linear least-squares problem and only requires matrix-vector product evaluations. tr_options : dict, optional Keyword options passed to trust-region solver. * ``tr_solver='exact'``: `tr_options` are ignored. * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`. Additionally, ``method='trf'`` supports 'regularize' option (bool, default is True), which adds a regularization term to the normal equation, which improves convergence if the Jacobian is rank-deficient [Byrd]_ (eq. 3.4). verbose : int, optional Level of algorithm's verbosity. * 0 : work silently (default). * 1 : display a termination report. * 2 : display progress during iterations. Returns ------- result : OptimizeResult Result of the optimization routine. References ---------- .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior, and Conjugate Gradient Method for Large-Scale Bound-Constrained Minimization Problems," SIAM Journal on Scientific Computing, Vol. 21, Number 1, pp 1-23, 1999. .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate solution of the trust region problem by minimization over two-dimensional subspaces", Math. Programming, 40, pp. 247-263, 1988. .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977. """ if tr_options is None: tr_options = {} if method not in ['trf', 'dogbox']: raise ValueError("`method` must be 'trf' or 'dogbox'.") if tr_solver not in ['exact', 'lsmr', 'cgls']: raise ValueError( "`tr_solver` must be one of {'exact', 'lsmr', 'cgls'}.") if verbose not in [0, 1, 2]: raise ValueError("`verbose` must be in [0, 1, 2].") if bounds is None: bounds = (-float('inf'), float('inf')) elif not (isinstance(bounds, (tuple, list)) and len(bounds) == 2): raise ValueError("`bounds` must be a tuple/list with 2 elements.") if max_nfev is not None and max_nfev <= 0: raise ValueError("`max_nfev` must be None or positive integer.") # initial point x0 = torch.atleast_1d(x0) if torch.is_complex(x0): raise ValueError("`x0` must be real.") elif x0.dim() > 1: raise ValueError("`x0` must have at most 1 dimension.") # bounds lb, ub = prepare_bounds(bounds, x0) if lb.shape != x0.shape or ub.shape != x0.shape: raise ValueError("Inconsistent shapes between bounds and `x0`.") elif torch.any(lb >= ub): raise ValueError("Each lower bound must be strictly less than each " "upper bound.") elif not in_bounds(x0, lb, ub): raise ValueError("`x0` is infeasible.") # x_scale x_scale = check_x_scale(x_scale, x0) # tolerance ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method) if method == 'trf': x0 = make_strictly_feasible(x0, lb, ub) def fun_wrapped(x): return torch.atleast_1d(fun(x)) # check function f0 = fun_wrapped(x0) if f0.dim() != 1: raise ValueError("`fun` must return at most 1-d array_like. " "f0.shape: {0}".format(f0.shape)) elif not f0.isfinite().all(): raise ValueError("Residuals are not finite in the initial point.") initial_cost = 0.5 * f0.dot(f0) if isinstance(x_scale, str) and x_scale == 'jac': raise ValueError("x_scale='jac' can't be used when `jac` " "returns LinearOperator.") if method == 'trf': result = trf(fun_wrapped, x0, f0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, tr_solver, tr_options.copy(), verbose) elif method == 'dogbox': raise NotImplementedError("'dogbox' method not yet implemented") # if tr_solver == 'lsmr' and 'regularize' in tr_options: # warn("The keyword 'regularize' in `tr_options` is not relevant " # "for 'dogbox' method.") # tr_options = tr_options.copy() # del tr_options['regularize'] # result = dogbox(fun_wrapped, x0, f0, lb, ub, ftol, xtol, gtol, # max_nfev, x_scale, tr_solver, tr_options, verbose) else: raise ValueError("`method` must be 'trf' or 'dogbox'.") result.message = TERMINATION_MESSAGES[result.status] result.success = result.status > 0 if verbose >= 1: print(result.message) print("Function evaluations {0}, initial cost {1:.4e}, final cost " "{2:.4e}, first-order optimality {3:.2e}.".format( result.nfev, initial_cost, result.cost, result.optimality)) return result
def fun_wrapped(x): return torch.atleast_1d(fun(x))
def step(self, snapshot_mode: str = "latest", meta_info: dict = None): # Save snapshot to save the correct iteration count self.save_snapshot() if self.curr_checkpoint == -1: if self._subrtn_policy is not None and self._train_initial_policy: # Add dummy values of variables that are logger later self.logger.add_value("avg log prob", -pyrado.inf) # Train the behavioral policy using the samples obtained from the prior. # Repeat the training if the resulting policy did not exceed the success threshold. domain_params = self._sbi_prior.sample( sample_shape=(self.num_eval_samples, )) print_cbt( "Training the initial policy using domain parameter sets sampled from prior.", "c") wrapped_trn_fcn = until_thold_exceeded( self.thold_succ_subrtn, self.max_subrtn_rep)(self.train_policy_sim) wrapped_trn_fcn( domain_params, prefix="init", use_rec_init_states=False) # overrides policy.pt self.reached_checkpoint() # setting counter to 0 if self.curr_checkpoint == 0: # Check if the rollout files already exist if (osp.isfile( osp.join(self._save_dir, f"iter_{self.curr_iter}_data_real.pt")) and osp.isfile(osp.join(self._save_dir, "data_real.pt")) and osp.isfile( osp.join(self._save_dir, "rollouts_real.pkl"))): # Rollout files do exist (can be when continuing a previous experiment) self._curr_data_real = pyrado.load( "data_real.pt", self._save_dir, prefix=f"iter_{self.curr_iter}") print_cbt( f"Loaded existing rollout data for iteration {self.curr_iter}.", "w") else: # If the policy depends on the domain-parameters, reset the policy with the # most likely dp-params from the previous round. pyrado.load( "policy.pt", self._save_dir, prefix=f"iter_{self._curr_iter - 1}" if self.curr_iter != 0 else "init", obj=self._policy, ) if self.curr_iter != 0: ml_domain_param = pyrado.load( "ml_domain_param.pkl", self.save_dir, prefix=f"iter_{self._curr_iter - 1}") self._policy.reset(**dict(domain_param=ml_domain_param)) # Rollout files do not exist yet (usual case) self._curr_data_real, _ = SBIBase.collect_data_real( self.save_dir, self._env_real, self._policy, self._embedding, prefix=f"iter_{self._curr_iter}", num_rollouts=self.num_real_rollouts, num_segments=self.num_segments, len_segments=self.len_segments, ) # Save the target domain data if self._curr_iter == 0: # Append the first set of data pyrado.save(self._curr_data_real, "data_real.pt", self._save_dir) else: # Append and save all data prev_data = pyrado.load("data_real.pt", self._save_dir) data_real_hist = to.cat([prev_data, self._curr_data_real], dim=0) pyrado.save(data_real_hist, "data_real.pt", self._save_dir) # Initialize sbi simulator and prior self._setup_sbi( prior=self._sbi_prior, rollouts_real=pyrado.load("rollouts_real.pkl", self._save_dir, prefix=f"iter_{self._curr_iter}"), ) self.reached_checkpoint() # setting counter to 1 if self.curr_checkpoint == 1: # Instantiate the sbi subroutine to retrain from scratch each iteration if self.reset_sbi_routine_each_iter: self._initialize_subrtn_sbi( subrtn_sbi_class=SNPE_A, num_components=self._num_components) # Initialize the proposal with the prior proposal = self._sbi_prior # Multi-round sbi for idx_r in range(self.num_sbi_rounds): # Sample parameters proposal, and simulate these parameters to obtain the data domain_param, data_sim = simulate_for_sbi( simulator=self._sbi_simulator, proposal=proposal, num_simulations=self.num_sim_per_round, simulation_batch_size=self.simulation_batch_size, num_workers=self.num_workers, ) self._cnt_samples += self.num_sim_per_round * self._env_sim_sbi.max_steps # Append simulations and proposals for sbi self._subrtn_sbi.append_simulations( domain_param, data_sim, proposal= proposal, # do not pass proposal arg for SNLE or SNRE ) # Train the posterior density_estimator = self._subrtn_sbi.train( final_round=idx_r == self.num_sbi_rounds - 1, component_perturbation=self._component_perturbation, **self.subrtn_sbi_training_hparam, ) posterior = self._subrtn_sbi.build_posterior( density_estimator=density_estimator, **self.subrtn_sbi_sampling_hparam) # Save the posterior of this iteration before tailoring it to the data (when it is still amortized) if idx_r == 0: pyrado.save( posterior, "posterior.pt", self._save_dir, prefix=f"iter_{self._curr_iter}", ) # Set proposal of the next round to focus on the next data set. # set_default_x() expects dim [1, num_rollouts * data_samples] proposal = posterior.set_default_x(self._curr_data_real) # Save the posterior tailored to each round pyrado.save( posterior, "posterior.pt", self._save_dir, prefix=f"iter_{self._curr_iter}_round_{idx_r}", ) # Override the latest posterior pyrado.save(posterior, "posterior.pt", self._save_dir) self.reached_checkpoint() # setting counter to 2 if self.curr_checkpoint == 2: # Logging (the evaluation can be time-intensive) posterior = pyrado.load("posterior.pt", self._save_dir) self._curr_domain_param_eval, log_probs = SBIBase.eval_posterior( posterior, self._curr_data_real, self.num_eval_samples, calculate_log_probs=True, normalize_posterior=self.normalize_posterior, subrtn_sbi_sampling_hparam=self.subrtn_sbi_sampling_hparam, ) self.logger.add_value("avg log prob", to.mean(log_probs), 4) self.logger.add_value("num total samples", self._cnt_samples) # Extract the most likely domain parameter set out of all target domain data sets current_domain_param = self._env_sim_sbi.domain_param idx_ml = to.argmax(log_probs).item() dp_vals = self._curr_domain_param_eval[idx_ml // self.num_eval_samples, idx_ml % self.num_eval_samples, :] dp_vals = to.atleast_1d(dp_vals).numpy() ml_domain_param = dict( zip(self.dp_mapping.values(), dp_vals.tolist())) # Update the unchanged domain parameters with the most likely ones obtained from the posterior current_domain_param.update(ml_domain_param) pyrado.save(current_domain_param, "ml_domain_param.pkl", self.save_dir, prefix=f"iter_{self._curr_iter}") self.reached_checkpoint() # setting counter to 3 if self.curr_checkpoint == 3: # Policy optimization if self._subrtn_policy is not None: pyrado.load( "policy.pt", self._save_dir, prefix=f"iter_{self._curr_iter - 1}" if self.curr_iter != 0 else "init", obj=self._policy, ) # Train the behavioral policy using the posterior samples obtained before. # Repeat the training if the resulting policy did not exceed the success threshold. print_cbt( "Training the next policy using domain parameter sets sampled from the current posterior.", "c") wrapped_trn_fcn = until_thold_exceeded( self.thold_succ_subrtn, self.max_subrtn_rep)(self.train_policy_sim) wrapped_trn_fcn(self._curr_domain_param_eval.squeeze(0), prefix=f"iter_{self._curr_iter}", use_rec_init_states=True) else: # save prefixed policy either way pyrado.save(self.policy, "policy.pt", self.save_dir, prefix=f"iter_{self._curr_iter}", use_state_dict=True) self.reached_checkpoint() # setting counter to 0 # Save snapshot data self.make_snapshot(snapshot_mode, None, meta_info)