def SKnopp(A, p, q, maxiters=None, checkperiod=None): tol = 1e-9 if maxiters is None: maxiters = A.shape[0] * A.shape[1] if checkperiod is None: checkperiod = 10 if p.ndim < 2 and q.ndim < 2: p = p[None, :] q = q[None, :] C = A # TODO: Maybe improve this if-else by looking # for other broadcasting techniques if C.ndim < 3: d1 = q / torch.sum(C, axis=0)[None, :] else: d1 = q / torch.sum(C, axis=1) if C.ndim < 3: d2 = p / multiprod(d1, C.T) else: d2 = p / torch.sum(C * d1[:, None, :], axis=2) gap = float("inf") iters = 0 while iters < maxiters: if C.ndim < 3: row = multiprod(d2, C) else: row = torch.sum(C * d2[:, :, None], axis=1) if iters % checkperiod == 0: gap = torch.max(torch.absolute(row * d1 - q)) if torch.any(torch.isnan(gap)) or gap <= tol: break iters += 1 d1_prev = d1 d2_prev = d2 d1 = q / row if C.ndim < 3: d2 = p / multiprod(d1, C.T) else: d2 = p / torch.sum(C * d1[:, None, :], axis=2) if torch.any(torch.isnan(d1)) or torch.any( torch.isinf(d1)) or torch.any(torch.isnan(d2)) or torch.any( torch.isinf(d2)): warnings.warn("""SKnopp: NanInfEncountered Nan or Inf occured at iter {:d} \n""".format(iters)) d1 = d1_prev d2 = d2_prev break return C * (torch.einsum('bn,bm->bnm', d2, d1))
def set_input(self, input): """Unpack input data from the dataloader and perform necessary pre-processing steps. Parameters: input (dict): include the data itself and its metadata information. The option 'direction' can be used to swap domain A and domain B. """ AtoB = self.opt.direction == 'AtoB' self.real_A = input['A' if AtoB else 'B'].to(self.device) self.real_B = input['B' if AtoB else 'A'].to(self.device) self.image_paths = input['A_paths' if AtoB else 'B_paths'] if self.is_using_mask: self.foreground_real_A = input["mask_A" if AtoB else "mask_B"].to( self.device) self.foreground_real_B = input["mask_B" if AtoB else "mask_A"].to( self.device) with torch.no_grad(): self.background_real_A = torch.absolute(1.0 - self.foreground_real_A) self.background_real_B = torch.absolute(1.0 - self.foreground_real_B)
def MSE_loss_reg(output, target, weights=None, L1=None, L2=None): """ updates MSE_loss with L1 and L2 loss :param output: :param target: :param weights: :param L1: :param L2: :return: """ loss_fn = nn.MSELoss() loss = loss_fn(output, target) if weights is not None and L2 is not None: loss += L1 * tc.absolute(weights).sum() + L2 * tc.square(weights).sum() return loss
def policy_eval(self): """ Iterative Policy Evaluation with in place state values. (Ref:Topic 4.1 in Sutton and Burto) """ steps = 0 while True: diff = torch.tensor(0.0) for state in range(self.state_values.numel()): v = torch.tensor(0.0) for k, action in enumerate(self.action_mapper): next_state, reward = self.next_state_reward(state, action) v += self.policy[state, k] * self.state_prob * ( reward + self.gamma * next_state) diff = torch.max(diff, torch.absolute(self.state_values[state] - v)) self.state_values[state] = v.clone() steps += 1 if diff < self.theta: print("Total Steps:", steps) break return self.state_values.reshape(self.width, self.height)
def intersection_angles(self, x0, x1) -> torch.Tensor: """ Compute all of the up to 2M intersections of the ellipse and the linear constraints """ g1 = self.A.matmul(x0) g2 = self.A.matmul(x1) r = torch.sqrt(g1**2 + g2**2) phi = 2 * torch.atan(g2 / (r + g1)).squeeze() # two solutions per linear constraint, shape of theta: (M, 2) arg = -(self.b / r.squeeze(-1)).squeeze() theta = torch.zeros((self.A.shape[0], 2), dtype=self.A.dtype, device=self.A.device) # write NaNs if there is no intersection arg[torch.absolute(arg) > 1] = torch.tensor(float("nan")) theta[:, 0] = torch.arccos(arg) + phi theta[:, 1] = -torch.arccos(arg) + phi theta = theta[torch.isfinite(theta)] return torch.sort(theta + (theta < 0.) * 2. * math.pi)[0] # in [0, 2*pi]
def policy_eval_long(self): """ Iterative Policy Evaluation with two arrays. (Ref:Topic 4.1 in Sutton and Burto) """ steps = 0 while True: diff = torch.tensor(0.0) temp = torch.zeros(self.state_values.shape) for state in range(self.state_values.numel()): for k, action in enumerate(self.action_mapper): self.__pointer = torch.tensor([state]) next_state, reward = self.next_state_reward(state, action) temp[state] += self.policy[state, k] * self.state_prob * ( reward + self.gamma * next_state) diff = torch.max( diff, torch.absolute(self.state_values[state] - temp[state])) self.state_values = temp.clone() steps = steps + 1 if diff < self.theta: print("Total Steps:", steps) break return self.state_values.reshape(self.width, self.height)
def pointwise_ops(self): a = torch.randn(4) b = torch.randn(4) t = torch.tensor([-1, -2, 3], dtype=torch.int8) r = torch.tensor([0, 1, 10, 0], dtype=torch.int8) t = torch.tensor([-1, -2, 3], dtype=torch.int8) s = torch.tensor([4, 0, 1, 0], dtype=torch.int8) f = torch.zeros(3) g = torch.tensor([-1, 0, 1]) w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637]) return ( torch.abs(torch.tensor([-1, -2, 3])), torch.absolute(torch.tensor([-1, -2, 3])), torch.acos(a), torch.arccos(a), torch.acosh(a.uniform_(1.0, 2.0)), torch.add(a, 20), torch.add(a, torch.randn(4, 1), alpha=10), torch.addcdiv(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.addcmul(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.angle(a), torch.asin(a), torch.arcsin(a), torch.asinh(a), torch.arcsinh(a), torch.atan(a), torch.arctan(a), torch.atanh(a.uniform_(-1.0, 1.0)), torch.arctanh(a.uniform_(-1.0, 1.0)), torch.atan2(a, a), torch.bitwise_not(t), torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.ceil(a), torch.clamp(a, min=-0.5, max=0.5), torch.clamp(a, min=0.5), torch.clamp(a, max=0.5), torch.clip(a, min=-0.5, max=0.5), torch.conj(a), torch.copysign(a, 1), torch.copysign(a, b), torch.cos(a), torch.cosh(a), torch.deg2rad( torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0, -90.0]])), torch.div(a, b), torch.divide(a, b, rounding_mode="trunc"), torch.divide(a, b, rounding_mode="floor"), torch.digamma(torch.tensor([1.0, 0.5])), torch.erf(torch.tensor([0.0, -1.0, 10.0])), torch.erfc(torch.tensor([0.0, -1.0, 10.0])), torch.erfinv(torch.tensor([0.0, 0.5, -1.0])), torch.exp(torch.tensor([0.0, math.log(2.0)])), torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])), torch.expm1(torch.tensor([0.0, math.log(2.0)])), torch.fake_quantize_per_channel_affine( torch.randn(2, 2, 2), (torch.randn(2) + 1) * 0.05, torch.zeros(2), 1, 0, 255, ), torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255), torch.float_power(torch.randint(10, (4, )), 2), torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4, -5])), torch.floor(a), # torch.floor_divide(torch.tensor([4.0, 3.0]), torch.tensor([2.0, 2.0])), # torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4), torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2), torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.frac(torch.tensor([1.0, 2.5, -3.2])), torch.randn(4, dtype=torch.cfloat).imag, torch.ldexp(torch.tensor([1.0]), torch.tensor([1])), torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])), torch.lerp(torch.arange(1.0, 5.0), torch.empty(4).fill_(10), 0.5), torch.lerp( torch.arange(1.0, 5.0), torch.empty(4).fill_(10), torch.full_like(torch.arange(1.0, 5.0), 0.5), ), torch.lgamma(torch.arange(0.5, 2, 0.5)), torch.log(torch.arange(5) + 10), torch.log10(torch.rand(5)), torch.log1p(torch.randn(5)), torch.log2(torch.rand(5)), torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logical_and(r, s), torch.logical_and(r.double(), s.double()), torch.logical_and(r.double(), s), torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)), torch.logical_not( torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)), torch.logical_not( torch.tensor([0.0, 1.0, -10.0], dtype=torch.double), out=torch.empty(3, dtype=torch.int16), ), torch.logical_or(r, s), torch.logical_or(r.double(), s.double()), torch.logical_or(r.double(), s), torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_xor(r, s), torch.logical_xor(r.double(), s.double()), torch.logical_xor(r.double(), s), torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logit(torch.rand(5), eps=1e-6), torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])), torch.i0(torch.arange(5, dtype=torch.float32)), torch.igamma(a, b), torch.igammac(a, b), torch.mul(torch.randn(3), 100), torch.multiply(torch.randn(4, 1), torch.randn(1, 4)), torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2), torch.tensor([float("nan"), float("inf"), -float("inf"), 3.14]), torch.nan_to_num(w), torch.nan_to_num(w, nan=2.0), torch.nan_to_num(w, nan=2.0, posinf=1.0), torch.neg(torch.randn(5)), # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]), torch.polygamma(1, torch.tensor([1.0, 0.5])), torch.polygamma(2, torch.tensor([1.0, 0.5])), torch.polygamma(3, torch.tensor([1.0, 0.5])), torch.polygamma(4, torch.tensor([1.0, 0.5])), torch.pow(a, 2), torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)), torch.rad2deg( torch.tensor([[3.142, -3.142], [6.283, -6.283], [1.570, -1.570]])), torch.randn(4, dtype=torch.cfloat).real, torch.reciprocal(a), torch.remainder(torch.tensor([-3.0, -2.0]), 2), torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.round(a), torch.rsqrt(a), torch.sigmoid(a), torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sgn(a), torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sin(a), torch.sinc(a), torch.sinh(a), torch.sqrt(a), torch.square(a), torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2), torch.tan(a), torch.tanh(a), torch.trunc(a), torch.xlogy(f, g), torch.xlogy(f, g), torch.xlogy(f, 4), torch.xlogy(2, g), )
def L1_loss(Y_pred, Y_train): return ch.sum(ch.absolute(Y_pred - Y_train))
# Analysis without any gradient computation print("====== Without gradient computation ======\n") for mem_chunk_factor in range(25, 100, 5): start_time = time.time() data_torch, nb_chunks = wph_op.preconfigure( data, mem_chunk_factor=mem_chunk_factor) for i in range(nb_chunks): coeffs_chunk = wph_op.apply(data_torch, i, norm=norm) del coeffs_chunk print( f"mem_chunk_factor = {mem_chunk_factor} -> ellapsed_time = {time.time() - start_time}" ) del data_torch # Analysis with gradient computation print("\n====== With gradient computation ======\n") for mem_chunk_factor_grad in range(50, 115, 5): start_time = time.time() data_torch, nb_chunks = wph_op.preconfigure( data, mem_chunk_factor_grad=mem_chunk_factor_grad, requires_grad=True) for i in range(nb_chunks): coeffs_chunk = wph_op.apply(data_torch, i, norm=norm) loss_chunk = (torch.absolute(coeffs_chunk)**2).sum() # Some loss loss_chunk.backward(retain_graph=True) del coeffs_chunk, loss_chunk # To free GPU memory print( f"mem_chunk_factor_grad = {mem_chunk_factor_grad} -> ellapsed_time = {time.time() - start_time}" ) del data_torch
def get_loss(output, target): # if loss == "mse": return F.mse_loss(torch.absolute(output), target)
def id_loss(real,generated,Lambda=2e-4): return Lambda * torch.mean(torch.absolute(real - generated))
def benchmark(args, archs_list, steps, nDryRuns): args.cuda = not args.no_cuda and torch.cuda.is_available() arch_dict = { args.arch: archs[args.arch] } if args.arch in archs_list else archs # by huiming, support one or all models. if args.cuda: import torch.backends.cudnn as cudnn cudnn.benchmark = True cudnn.deterministic = True kernel = 'cudnn' p = subprocess.check_output('nvidia-smi --query-gpu=name --format=csv', shell=True) device_name = str(p).split('\\n')[1] else: kernel = 'nn' p = subprocess.check_output( 'cat /proc/cpuinfo | grep name | head -n 1', shell=True) device_name = str(p).split(':')[1][:-3] print('\nRunning on device: %s' % (device_name)) def _time(): if args.cuda: torch.cuda.synchronize() return time.time() for bs in [1, 5, 8, 19]: for arch, sizes in arch_dict.items(): if arch == 'unet3d': batch_size, c, d, h, w = sizes[0], sizes[1], sizes[2], sizes[ 3], sizes[4] batch_size = bs print( 'ModelType: %s, Kernels: %s Input shape: %dx%dx%dx%dx%d' % (arch, kernel, batch_size, c, d, h, w)) torch.manual_seed(0) data_ = torch.randn(batch_size, c, d, h, w).to_zendnn() else: batch_size, c, h, w = sizes[0], sizes[1], sizes[2], sizes[3] batch_size = 64 if arch == 'resnet50' and args.inference else batch_size batch_size = bs print('ModelType: %s, Kernels: %s Input shape: %dx%dx%dx%d' % (arch, kernel, batch_size, c, h, w)) torch.manual_seed(0) data_ = torch.randn(batch_size, c, h, w) target_ = torch.arange(1, batch_size + 1).long() net = models.__dict__[arch]( ) # no need to load pre-trained weights for dummy data optimizer = optim.SGD(net.parameters(), lr=0.01) criterion = nn.CrossEntropyLoss() if arch == 'overfeat' or arch == 'alexnet' or arch == 'vgg11': net.eval() data, target = Variable(data_), Variable(target_) time_fwd, time_bwd, time_upt = 0, 0, 0 with torch.no_grad(): steps = 1 for omp in [1, 5, 8, 24]: os.environ["OMP_NUM_THREADS"] = str(omp) t1 = _time() output = net(data) t2 = _time() time_fwd = time_fwd + (t2 - t1) omp = os.getenv('OMP_NUM_THREADS') excepted_output = torch.load( './mkldnn_cnn_outputs/mkldnn_' + arch + '_bs_' + str(bs) + '_omp_' + str(omp) + '.pt') diff = torch.max(torch.absolute(output - excepted_output)) if diff < 0.0001: print("\n********************* output matching for ", arch, " with batch size = ", bs, "for OMP_NUM_THREADS =", omp, " *********************\n") else: print( "\n********************* warning output mismatching for ", arch, " with batch size = ", bs, "for OMP_NUM_THREADS =", omp, " *********************\n") time_fwd_avg = time_fwd / steps * 1000 time_bwd_avg = time_bwd / steps * 1000 time_upt_avg = time_upt / steps * 1000 # update not included! time_total = time_fwd_avg + time_bwd_avg print("%-30s %10s %10.2f (ms) %10.2f (imgs/s)\n" % (kernel, ':forward:', time_fwd_avg, batch_size * 1000 / time_fwd_avg))
# flake8: noqa import torch import math a = torch.randn(4) b = torch.randn(4) t = torch.tensor([-1, -2, 3], dtype=torch.int8) # abs/absolute torch.abs(torch.tensor([-1, -2, 3])) torch.absolute(torch.tensor([-1, -2, 3])) # acos/arccos torch.acos(a) torch.arccos(a) # acosh/arccosh torch.acosh(a.uniform_(1, 2)) # add torch.add(a, 20) torch.add(a, torch.randn(4, 1), alpha=10) # addcdiv torch.addcdiv(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1) # addcmul torch.addcmul(torch.randn(1, 3),
def rand(self): Z = torch.absolute(torch.randn(self._n, self._m)) return SKnopp(Z, self._p, self._q, self._maxSKnoppIters, self._checkperiod)