示例#1
0
def SKnopp(A, p, q, maxiters=None, checkperiod=None):
    tol = 1e-9
    if maxiters is None:
        maxiters = A.shape[0] * A.shape[1]

    if checkperiod is None:
        checkperiod = 10

    if p.ndim < 2 and q.ndim < 2:
        p = p[None, :]
        q = q[None, :]

    C = A

    # TODO: Maybe improve this if-else by looking
    # for other broadcasting techniques
    if C.ndim < 3:
        d1 = q / torch.sum(C, axis=0)[None, :]
    else:
        d1 = q / torch.sum(C, axis=1)

    if C.ndim < 3:
        d2 = p / multiprod(d1, C.T)
    else:
        d2 = p / torch.sum(C * d1[:, None, :], axis=2)

    gap = float("inf")

    iters = 0
    while iters < maxiters:
        if C.ndim < 3:
            row = multiprod(d2, C)
        else:
            row = torch.sum(C * d2[:, :, None], axis=1)

        if iters % checkperiod == 0:
            gap = torch.max(torch.absolute(row * d1 - q))
            if torch.any(torch.isnan(gap)) or gap <= tol:
                break
        iters += 1

        d1_prev = d1
        d2_prev = d2
        d1 = q / row
        if C.ndim < 3:
            d2 = p / multiprod(d1, C.T)
        else:
            d2 = p / torch.sum(C * d1[:, None, :], axis=2)

        if torch.any(torch.isnan(d1)) or torch.any(
                torch.isinf(d1)) or torch.any(torch.isnan(d2)) or torch.any(
                    torch.isinf(d2)):
            warnings.warn("""SKnopp: NanInfEncountered
                    Nan or Inf occured at iter {:d} \n""".format(iters))
            d1 = d1_prev
            d2 = d2_prev
            break

    return C * (torch.einsum('bn,bm->bnm', d2, d1))
示例#2
0
    def set_input(self, input):
        """Unpack input data from the dataloader and perform necessary pre-processing steps.
		Parameters:
			input (dict): include the data itself and its metadata information.
		The option 'direction' can be used to swap domain A and domain B.
		"""
        AtoB = self.opt.direction == 'AtoB'
        self.real_A = input['A' if AtoB else 'B'].to(self.device)
        self.real_B = input['B' if AtoB else 'A'].to(self.device)
        self.image_paths = input['A_paths' if AtoB else 'B_paths']

        if self.is_using_mask:
            self.foreground_real_A = input["mask_A" if AtoB else "mask_B"].to(
                self.device)
            self.foreground_real_B = input["mask_B" if AtoB else "mask_A"].to(
                self.device)
            with torch.no_grad():
                self.background_real_A = torch.absolute(1.0 -
                                                        self.foreground_real_A)
                self.background_real_B = torch.absolute(1.0 -
                                                        self.foreground_real_B)
示例#3
0
def MSE_loss_reg(output, target, weights=None, L1=None, L2=None):
    """
    updates MSE_loss with L1 and L2 loss
    :param output:
    :param target:
    :param weights:
    :param L1:
    :param L2:
    :return:
    """

    loss_fn = nn.MSELoss()
    loss = loss_fn(output, target)

    if weights is not None and L2 is not None:
        loss += L1 * tc.absolute(weights).sum() + L2 * tc.square(weights).sum()

    return loss
示例#4
0
 def policy_eval(self):
     """ Iterative Policy Evaluation with in place state values. (Ref:Topic 4.1 in Sutton and Burto) """
     steps = 0
     while True:
         diff = torch.tensor(0.0)
         for state in range(self.state_values.numel()):
             v = torch.tensor(0.0)
             for k, action in enumerate(self.action_mapper):
                 next_state, reward = self.next_state_reward(state, action)
                 v += self.policy[state, k] * self.state_prob * (
                     reward + self.gamma * next_state)
             diff = torch.max(diff,
                              torch.absolute(self.state_values[state] - v))
             self.state_values[state] = v.clone()
         steps += 1
         if diff < self.theta:
             print("Total Steps:", steps)
             break
     return self.state_values.reshape(self.width, self.height)
    def intersection_angles(self, x0, x1) -> torch.Tensor:
        """ Compute all of the up to 2M intersections of the ellipse and the linear constraints """
        g1 = self.A.matmul(x0)
        g2 = self.A.matmul(x1)

        r = torch.sqrt(g1**2 + g2**2)
        phi = 2 * torch.atan(g2 / (r + g1)).squeeze()

        # two solutions per linear constraint, shape of theta: (M, 2)
        arg = -(self.b / r.squeeze(-1)).squeeze()
        theta = torch.zeros((self.A.shape[0], 2),
                            dtype=self.A.dtype,
                            device=self.A.device)

        # write NaNs if there is no intersection
        arg[torch.absolute(arg) > 1] = torch.tensor(float("nan"))
        theta[:, 0] = torch.arccos(arg) + phi
        theta[:, 1] = -torch.arccos(arg) + phi
        theta = theta[torch.isfinite(theta)]

        return torch.sort(theta +
                          (theta < 0.) * 2. * math.pi)[0]  # in [0, 2*pi]
示例#6
0
    def policy_eval_long(self):
        """ Iterative Policy Evaluation with two arrays. (Ref:Topic 4.1 in Sutton and Burto) """
        steps = 0
        while True:
            diff = torch.tensor(0.0)
            temp = torch.zeros(self.state_values.shape)
            for state in range(self.state_values.numel()):
                for k, action in enumerate(self.action_mapper):
                    self.__pointer = torch.tensor([state])
                    next_state, reward = self.next_state_reward(state, action)
                    temp[state] += self.policy[state, k] * self.state_prob * (
                        reward + self.gamma * next_state)

                diff = torch.max(
                    diff,
                    torch.absolute(self.state_values[state] - temp[state]))
            self.state_values = temp.clone()
            steps = steps + 1
            if diff < self.theta:
                print("Total Steps:", steps)
                break
        return self.state_values.reshape(self.width, self.height)
示例#7
0
 def pointwise_ops(self):
     a = torch.randn(4)
     b = torch.randn(4)
     t = torch.tensor([-1, -2, 3], dtype=torch.int8)
     r = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
     t = torch.tensor([-1, -2, 3], dtype=torch.int8)
     s = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
     f = torch.zeros(3)
     g = torch.tensor([-1, 0, 1])
     w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637])
     return (
         torch.abs(torch.tensor([-1, -2, 3])),
         torch.absolute(torch.tensor([-1, -2, 3])),
         torch.acos(a),
         torch.arccos(a),
         torch.acosh(a.uniform_(1.0, 2.0)),
         torch.add(a, 20),
         torch.add(a, torch.randn(4, 1), alpha=10),
         torch.addcdiv(torch.randn(1, 3),
                       torch.randn(3, 1),
                       torch.randn(1, 3),
                       value=0.1),
         torch.addcmul(torch.randn(1, 3),
                       torch.randn(3, 1),
                       torch.randn(1, 3),
                       value=0.1),
         torch.angle(a),
         torch.asin(a),
         torch.arcsin(a),
         torch.asinh(a),
         torch.arcsinh(a),
         torch.atan(a),
         torch.arctan(a),
         torch.atanh(a.uniform_(-1.0, 1.0)),
         torch.arctanh(a.uniform_(-1.0, 1.0)),
         torch.atan2(a, a),
         torch.bitwise_not(t),
         torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.ceil(a),
         torch.clamp(a, min=-0.5, max=0.5),
         torch.clamp(a, min=0.5),
         torch.clamp(a, max=0.5),
         torch.clip(a, min=-0.5, max=0.5),
         torch.conj(a),
         torch.copysign(a, 1),
         torch.copysign(a, b),
         torch.cos(a),
         torch.cosh(a),
         torch.deg2rad(
             torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0,
                                                              -90.0]])),
         torch.div(a, b),
         torch.divide(a, b, rounding_mode="trunc"),
         torch.divide(a, b, rounding_mode="floor"),
         torch.digamma(torch.tensor([1.0, 0.5])),
         torch.erf(torch.tensor([0.0, -1.0, 10.0])),
         torch.erfc(torch.tensor([0.0, -1.0, 10.0])),
         torch.erfinv(torch.tensor([0.0, 0.5, -1.0])),
         torch.exp(torch.tensor([0.0, math.log(2.0)])),
         torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])),
         torch.expm1(torch.tensor([0.0, math.log(2.0)])),
         torch.fake_quantize_per_channel_affine(
             torch.randn(2, 2, 2),
             (torch.randn(2) + 1) * 0.05,
             torch.zeros(2),
             1,
             0,
             255,
         ),
         torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255),
         torch.float_power(torch.randint(10, (4, )), 2),
         torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4,
                                                             -5])),
         torch.floor(a),
         # torch.floor_divide(torch.tensor([4.0, 3.0]), torch.tensor([2.0, 2.0])),
         # torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4),
         torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2),
         torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5),
         torch.frac(torch.tensor([1.0, 2.5, -3.2])),
         torch.randn(4, dtype=torch.cfloat).imag,
         torch.ldexp(torch.tensor([1.0]), torch.tensor([1])),
         torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])),
         torch.lerp(torch.arange(1.0, 5.0),
                    torch.empty(4).fill_(10), 0.5),
         torch.lerp(
             torch.arange(1.0, 5.0),
             torch.empty(4).fill_(10),
             torch.full_like(torch.arange(1.0, 5.0), 0.5),
         ),
         torch.lgamma(torch.arange(0.5, 2, 0.5)),
         torch.log(torch.arange(5) + 10),
         torch.log10(torch.rand(5)),
         torch.log1p(torch.randn(5)),
         torch.log2(torch.rand(5)),
         torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])),
         torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]),
                         torch.tensor([-1, -2, -3])),
         torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]),
                         torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]),
                          torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]),
                          torch.tensor([-1, -2, -3])),
         torch.logical_and(r, s),
         torch.logical_and(r.double(), s.double()),
         torch.logical_and(r.double(), s),
         torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)),
         torch.logical_not(
             torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)),
         torch.logical_not(
             torch.tensor([0.0, 1.0, -10.0], dtype=torch.double),
             out=torch.empty(3, dtype=torch.int16),
         ),
         torch.logical_or(r, s),
         torch.logical_or(r.double(), s.double()),
         torch.logical_or(r.double(), s),
         torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logical_xor(r, s),
         torch.logical_xor(r.double(), s.double()),
         torch.logical_xor(r.double(), s),
         torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logit(torch.rand(5), eps=1e-6),
         torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])),
         torch.i0(torch.arange(5, dtype=torch.float32)),
         torch.igamma(a, b),
         torch.igammac(a, b),
         torch.mul(torch.randn(3), 100),
         torch.multiply(torch.randn(4, 1), torch.randn(1, 4)),
         torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2),
         torch.tensor([float("nan"),
                       float("inf"), -float("inf"), 3.14]),
         torch.nan_to_num(w),
         torch.nan_to_num(w, nan=2.0),
         torch.nan_to_num(w, nan=2.0, posinf=1.0),
         torch.neg(torch.randn(5)),
         # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]),
         torch.polygamma(1, torch.tensor([1.0, 0.5])),
         torch.polygamma(2, torch.tensor([1.0, 0.5])),
         torch.polygamma(3, torch.tensor([1.0, 0.5])),
         torch.polygamma(4, torch.tensor([1.0, 0.5])),
         torch.pow(a, 2),
         torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)),
         torch.rad2deg(
             torch.tensor([[3.142, -3.142], [6.283, -6.283],
                           [1.570, -1.570]])),
         torch.randn(4, dtype=torch.cfloat).real,
         torch.reciprocal(a),
         torch.remainder(torch.tensor([-3.0, -2.0]), 2),
         torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5),
         torch.round(a),
         torch.rsqrt(a),
         torch.sigmoid(a),
         torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])),
         torch.sgn(a),
         torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])),
         torch.sin(a),
         torch.sinc(a),
         torch.sinh(a),
         torch.sqrt(a),
         torch.square(a),
         torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2),
         torch.tan(a),
         torch.tanh(a),
         torch.trunc(a),
         torch.xlogy(f, g),
         torch.xlogy(f, g),
         torch.xlogy(f, 4),
         torch.xlogy(2, g),
     )
示例#8
0
 def L1_loss(Y_pred, Y_train):
     return ch.sum(ch.absolute(Y_pred - Y_train))
示例#9
0
# Analysis without any gradient computation
print("====== Without gradient computation ======\n")
for mem_chunk_factor in range(25, 100, 5):
    start_time = time.time()
    data_torch, nb_chunks = wph_op.preconfigure(
        data, mem_chunk_factor=mem_chunk_factor)
    for i in range(nb_chunks):
        coeffs_chunk = wph_op.apply(data_torch, i, norm=norm)
        del coeffs_chunk
    print(
        f"mem_chunk_factor = {mem_chunk_factor} -> ellapsed_time = {time.time() - start_time}"
    )
    del data_torch

# Analysis with gradient computation
print("\n====== With gradient computation ======\n")
for mem_chunk_factor_grad in range(50, 115, 5):
    start_time = time.time()
    data_torch, nb_chunks = wph_op.preconfigure(
        data, mem_chunk_factor_grad=mem_chunk_factor_grad, requires_grad=True)
    for i in range(nb_chunks):
        coeffs_chunk = wph_op.apply(data_torch, i, norm=norm)
        loss_chunk = (torch.absolute(coeffs_chunk)**2).sum()  # Some loss
        loss_chunk.backward(retain_graph=True)
        del coeffs_chunk, loss_chunk  # To free GPU memory
    print(
        f"mem_chunk_factor_grad = {mem_chunk_factor_grad} -> ellapsed_time = {time.time() - start_time}"
    )
    del data_torch
def get_loss(output, target):
    # if loss == "mse":
    return F.mse_loss(torch.absolute(output), target)
示例#11
0
def id_loss(real,generated,Lambda=2e-4):
  return Lambda * torch.mean(torch.absolute(real - generated))
示例#12
0
def benchmark(args, archs_list, steps, nDryRuns):

    args.cuda = not args.no_cuda and torch.cuda.is_available()

    arch_dict = {
        args.arch: archs[args.arch]
    } if args.arch in archs_list else archs  # by huiming, support one or all models.

    if args.cuda:
        import torch.backends.cudnn as cudnn
        cudnn.benchmark = True
        cudnn.deterministic = True

        kernel = 'cudnn'
        p = subprocess.check_output('nvidia-smi --query-gpu=name --format=csv',
                                    shell=True)
        device_name = str(p).split('\\n')[1]
    else:
        kernel = 'nn'
        p = subprocess.check_output(
            'cat /proc/cpuinfo | grep name | head -n 1', shell=True)
        device_name = str(p).split(':')[1][:-3]

    print('\nRunning on device: %s' % (device_name))

    def _time():
        if args.cuda:
            torch.cuda.synchronize()

        return time.time()

    for bs in [1, 5, 8, 19]:
        for arch, sizes in arch_dict.items():
            if arch == 'unet3d':
                batch_size, c, d, h, w = sizes[0], sizes[1], sizes[2], sizes[
                    3], sizes[4]
                batch_size = bs
                print(
                    'ModelType: %s, Kernels: %s Input shape: %dx%dx%dx%dx%d' %
                    (arch, kernel, batch_size, c, d, h, w))
                torch.manual_seed(0)

                data_ = torch.randn(batch_size, c, d, h, w).to_zendnn()
            else:
                batch_size, c, h, w = sizes[0], sizes[1], sizes[2], sizes[3]
                batch_size = 64 if arch == 'resnet50' and args.inference else batch_size
                batch_size = bs

                print('ModelType: %s, Kernels: %s Input shape: %dx%dx%dx%d' %
                      (arch, kernel, batch_size, c, h, w))

                torch.manual_seed(0)
                data_ = torch.randn(batch_size, c, h, w)

            target_ = torch.arange(1, batch_size + 1).long()

            net = models.__dict__[arch](
            )  # no need to load pre-trained weights for dummy data

            optimizer = optim.SGD(net.parameters(), lr=0.01)
            criterion = nn.CrossEntropyLoss()

            if arch == 'overfeat' or arch == 'alexnet' or arch == 'vgg11':
                net.eval()

            data, target = Variable(data_), Variable(target_)

            time_fwd, time_bwd, time_upt = 0, 0, 0
            with torch.no_grad():
                steps = 1
                for omp in [1, 5, 8, 24]:
                    os.environ["OMP_NUM_THREADS"] = str(omp)
                    t1 = _time()
                    output = net(data)
                    t2 = _time()
                    time_fwd = time_fwd + (t2 - t1)
                    omp = os.getenv('OMP_NUM_THREADS')
                    excepted_output = torch.load(
                        './mkldnn_cnn_outputs/mkldnn_' + arch + '_bs_' +
                        str(bs) + '_omp_' + str(omp) + '.pt')
                    diff = torch.max(torch.absolute(output - excepted_output))
                    if diff < 0.0001:
                        print("\n********************* output matching for ",
                              arch, " with batch size = ", bs,
                              "for OMP_NUM_THREADS =", omp,
                              " *********************\n")
                    else:
                        print(
                            "\n********************* warning output mismatching for ",
                            arch, " with batch size = ", bs,
                            "for OMP_NUM_THREADS =", omp,
                            " *********************\n")

                    time_fwd_avg = time_fwd / steps * 1000
                    time_bwd_avg = time_bwd / steps * 1000
                    time_upt_avg = time_upt / steps * 1000

                    # update not included!
                    time_total = time_fwd_avg + time_bwd_avg

                    print("%-30s %10s %10.2f (ms) %10.2f (imgs/s)\n" %
                          (kernel, ':forward:', time_fwd_avg,
                           batch_size * 1000 / time_fwd_avg))
示例#13
0
# flake8: noqa
import torch
import math

a = torch.randn(4)
b = torch.randn(4)
t = torch.tensor([-1, -2, 3], dtype=torch.int8)

# abs/absolute
torch.abs(torch.tensor([-1, -2, 3]))
torch.absolute(torch.tensor([-1, -2, 3]))

# acos/arccos
torch.acos(a)
torch.arccos(a)

# acosh/arccosh
torch.acosh(a.uniform_(1, 2))

# add
torch.add(a, 20)
torch.add(a, torch.randn(4, 1), alpha=10)

# addcdiv
torch.addcdiv(torch.randn(1, 3),
              torch.randn(3, 1),
              torch.randn(1, 3),
              value=0.1)

# addcmul
torch.addcmul(torch.randn(1, 3),
示例#14
0
 def rand(self):
     Z = torch.absolute(torch.randn(self._n, self._m))
     return SKnopp(Z, self._p, self._q, self._maxSKnoppIters,
                   self._checkperiod)