def RecToPolar_3(RectData): ''' Implement cartesian coordinate to polar coordinate imput: the array of cartesian coordinate output: the polar coodinate ''' # RectData=yy # print(RectData.size()) SizeOfData = RectData.size() if (SizeOfData[2] == 3): # print(RectData[0:3,:]) ListSmall = 1e-16 #use a small num for illegal divition R = torch.norm(RectData, p=2, dim=2) + ListSmall # print(R) Phi_Value = torch.addcdiv(torch.zeros_like(R), 1, RectData[:, :, 2], R) Phi = torch.acos(Phi_Value) #利用反余弦函数求出俯仰角 r = torch.addcmul(torch.zeros_like(R), 1, R, torch.sin(Phi)) + ListSmall Theta_Value = torch.addcdiv(torch.zeros_like(r), 1, RectData[:, :, 0], r) SignalOfNum = torch.lt(RectData[:, :, 1], torch.zeros_like(Theta_Value)).double() Flag_Signal_Coe = (-2 * SignalOfNum + 1) Flag_Fixed_Tail = np.pi * 2 * SignalOfNum Theta = torch.acos( Theta_Value).double() * Flag_Signal_Coe + Flag_Fixed_Tail result = torch.cat( (torch.unsqueeze(R.double(), 2), torch.unsqueeze( Theta.double(), 2), torch.unsqueeze(Phi.double(), 2)), dim=2) return (result)
def t_transform(T_src, T_delta, zoom_factor, num_classes): ''' :param T_src: (x1, y1, z1) :param T_delta: (dx, dy, dz) :return: T_tgt: (x2, y2, z2) ''' weight = 10.0 T_src = T_src.repeat(1, num_classes) vz = torch.div(T_src[:, 2::3], torch.exp(T_delta[:, 2::3] / weight)) vx = torch.mul( vz, torch.addcdiv(T_delta[:, 0::3] / weight, 1.0, T_src[:, 0::3], T_src[:, 2::3])) vy = torch.mul( vz, torch.addcdiv(T_delta[:, 1::3] / weight, 1.0, T_src[:, 1::3], T_src[:, 2::3])) T_tgt = torch.zeros_like(T_src) T_tgt[:, 0::3] = vx T_tgt[:, 1::3] = vy T_tgt[:, 2::3] = vz return T_tgt
def t_transform_old(T_src, T_delta, zoom_factor, num_classes): ''' :param T_src: (x1, y1, z1) :param T_delta: (dx, dy, dz) :return: T_tgt: (x2, y2, z2) ''' T_src = T_src.repeat(1, num_classes) factor_x = torch.unsqueeze(zoom_factor[:, 0], 1) factor_y = torch.unsqueeze(zoom_factor[:, 1], 1) vx_0 = torch.mul(T_delta[:, 0::3], factor_x.repeat(1, num_classes)) vy_0 = torch.mul(T_delta[:, 1::3], factor_y.repeat(1, num_classes)) vz = torch.div(T_src[:, 2::3], torch.exp(T_delta[:, 2::3])) vx = torch.mul(vz, torch.addcdiv(vx_0, 1.0, T_src[:, 0::3], T_src[:, 2::3])) vy = torch.mul(vz, torch.addcdiv(vy_0, 1.0, T_src[:, 1::3], T_src[:, 2::3])) T_tgt = torch.zeros_like(T_src) T_tgt[:, 0::3] = vx T_tgt[:, 1::3] = vy T_tgt[:, 2::3] = vz return T_tgt
def T_transform_batch(T_src, T_delta, zoom_factor, labels_pred=None): """inv_zoom T_delta; T_delta + T_src --> T_tgt. T_src: [B, 3] (x1, y1, z1) T_delta: [B, 3xnum_classes] (dx, dy, dz) zoom_factor: [B, 4] wx = crop_height / height wy = crop_height / height tx = zoom_c_x / width * 2 - 1 ty = zoom_c_y / height * 2 - 1 affine_matrix = [[wx, 0, tx], [0, wy, ty]] --------- T_tgt: [B, 3] (x2, y2, z2) """ batch_size = T_delta.shape[0] if T_delta.shape[1] > 3: # class aware assert labels_pred is not None, "labels_pred should not be None when class aware" inds = torch.arange(0, batch_size, dtype=torch.long, device=T_delta.device) T_delta_selected = T_delta.view(batch_size, -1, 3)[inds, labels_pred] # [B, 3] else: T_delta_selected = T_delta factor_x = zoom_factor[:, 0] # [B,] factor_y = zoom_factor[:, 1] # [B,] vx_0 = T_delta_selected[:, 0] * factor_x vy_0 = T_delta_selected[:, 1] * factor_y vz = torch.div(T_src[:, 2], torch.exp(T_delta_selected[:, 2])) vx = vz * torch.addcdiv(vx_0, 1.0, T_src[:, 0], T_src[:, 2]) vy = vz * torch.addcdiv(vy_0, 1.0, T_src[:, 1], T_src[:, 2]) # import pdb; pdb.set_trace() T_tgt = torch.stack([vx, vy, vz], 1) return T_tgt
def _test_torch_addcdiv(t1, t2, t3): h1 = t1.hammerblade() h2 = t2.hammerblade() h3 = t3.hammerblade() out = torch.addcdiv(t1, t2, t3, value=0.1) out_h = torch.addcdiv(h1, h2, h3, value=0.1) assert out_h.device == torch.device("hammerblade") assert torch.allclose(out_h.cpu(), out)
def test_integer_addcdiv_deprecated(self, device, dtype): t = torch.tensor(1, device=device, dtype=dtype) with self.assertRaisesRegex(RuntimeError, '^Integer division.+is no longer supported.+'): torch.addcdiv(t, t, t) with self.assertRaisesRegex(RuntimeError, '^Integer division.+is no longer supported.+'): torch.addcdiv(t, t, t, out=t) with self.assertRaisesRegex(RuntimeError, '^Integer division.+is no longer supported+'): t.addcdiv_(t, t)
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data state = self.state[id(p)] #print('state',state) square_avg = state['square_avg'] alpha = group['alpha'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) torch.addcmul(square_avg.mul_(alpha), grad, grad, value=1 - alpha) #square_avg.mul_(alpha).addcmul_(1 - alpha, grad, grad) if group['centered']: grad_avg = state['grad_avg'] grad_avg.mul_(alpha).add_(1 - alpha, grad) avg = square_avg.addcmul( -1, grad_avg, grad_avg).sqrt().add_(group['eps']) else: avg = square_avg.sqrt().add_(group['eps']) if group['momentum'] > 0: buf = state['momentum_buffer'] buf.mul_(group['momentum']).addcdiv_(grad, avg) p.data.add_(-group['lr'], buf) else: ''' print('type(p)',p) print("-group['lr']",-group['lr']) print("avg",avg) ''' torch.addcdiv(p.data, grad, avg, value=-group['lr']) #p.data.addcdiv_(-group['lr'], grad, avg) return loss
def RecToPolar(RectData): # print(RectData.type()) defaultType = RectData.dtype ''' transform array from cartesian coordinates to spherical coordinates input: x,y,z output: R,Theta,Phi ''' # RectData=local_parameter_org[:,0,:] SizeOfData = RectData.size() if (SizeOfData[1] == 3): # print(RectData[0:3,:]) ListSmall = 1e-20 # use a small num for illegal divition ListSmall = torch.tensor(1e-20, dtype=defaultType) R = torch.norm(RectData, p=2, dim=1) + ListSmall Phi_Value = torch.addcdiv(torch.zeros_like(R), 1, RectData[:, 2], R) Phi_Value = torch.tensor(Phi_Value, dtype=defaultType) Phi = torch.acos(Phi_Value) # 利用反余弦函数求出俯仰角 phi = torch.tensor(Phi, dtype=defaultType) r = torch.addcmul(torch.zeros_like(R), 1, R, torch.sin(Phi)) + ListSmall r = torch.tensor(r, dtype=defaultType) Theta_Value = torch.addcdiv(torch.zeros_like(r), 1, RectData[:, 0], r).type_as(RectData) SignalOfNum = torch.lt(RectData[:, 1], torch.zeros_like(Theta_Value)).float() SignalOfNum = torch.tensor(SignalOfNum, dtype=defaultType) Flag_Signal_Coe = (-2 * SignalOfNum + 1) Flag_Fixed_Tail = np.pi * 2 * SignalOfNum Theta = torch.acos(Theta_Value) * Flag_Signal_Coe + Flag_Fixed_Tail return (torch.cat( (R.reshape(-1, 1), Theta.reshape(-1, 1), Phi.reshape(-1, 1)), dim=1)) elif (SizeOfData[1] == 2): ListSmall = 1e-20 # use a small num for illegal divition R = torch.norm(RectData, p=2, dim=1) + ListSmall Theta_Value = torch.addcdiv(torch.zeros_like(R), 1, RectData[:, 0], R).type_as(RectData) SignalOfNum = torch.lt(RectData[:, 1], torch.zeros_like(Theta_Value)) Flag_Signal_Coe = (-2 * SignalOfNum + 1) Flag_Signal_Coe = Flag_Signal_Coe.type_as(RectData) Flag_Fixed_Tail = np.pi * 2 * SignalOfNum Flag_Fixed_Tail = Flag_Fixed_Tail.type_as(RectData) Theta = torch.acos(Theta_Value) * Flag_Signal_Coe + Flag_Fixed_Tail return (torch.cat((R.reshape(-1, 1), Theta.reshape(-1, 1)), dim=1)) else: print('woring data format')
def dual_gp_params(self, model, X, y, sample=True): """Returns, Us, vs, mt, st, beta """ mu_t = self.state['mu'] p_t = self.state['precision'] m_t, s_t = self.compute_linprior() beta, alpha = self.defaults['betas'][1], self.param_groups[0]['lr'] model.eval() parameters = self.param_groups[0]['params'] if sample: raw_noise = torch.normal(mean=torch.zeros_like(mu_t), std=1.0) p = torch.addcdiv(mu_t, 1., raw_noise, torch.sqrt(p_t)) vector_to_parameters(p, parameters) else: vector_to_parameters(mu_t, parameters) Us = list() output = model.forward(X).flatten() n = len(X) for i in range(n): model.zero_grad() output[i].backward(retain_graph=(i < (n - 1))) Us.append(model.gradient) vs = output.detach().numpy() - y.detach().numpy() # Reset model parameters to mean vector_to_parameters(mu_t, parameters) return np.stack(Us), np.array( vs), m_t.detach().numpy(), s_t.detach().numpy(), beta
def __init__(self, c_in, c_out, k_size, stride, pad, initializer='kaiming'): super(equalized_deconv2d, self).__init__() self.deconv = nn.ConvTranspose2d(c_in, c_out, k_size, stride, pad, bias=False) if initializer == 'kaiming': torch.nn.init.kaiming_normal(self.deconv.weight) elif initializer == 'xavier': torch.nn.init.xavier_normal(self.deconv.weight) deconv_w = self.deconv.weight.data.clone() self.scale = np.sqrt(deconv_w.pow(2).mean()) self.bias = torch.nn.Parameter(torch.FloatTensor(c_out).fill_(0)) inv_w = deconv_w.clone().fill_(self.scale) t = inv_w.clone().fill_(0) self.deconv.weight.data = torch.addcdiv( t, 1, self.deconv.weight.data, inv_w) # adjust weights dynamically.
def get_dual_predictions(self, jac_closure, mc_samples=10, ret_jac=False): mu = self.state['mu'] precision = self.state['precision'] parameters = self.param_groups[0]['params'] J_list = [] fxs = [] Jv_list = [] for _ in range(mc_samples): # Sample a parameter vector: raw_noise = torch.normal(mean=torch.zeros_like(mu), std=1.0) p = torch.addcdiv(mu, 1., raw_noise, torch.sqrt(precision)) vector_to_parameters(p, parameters) # Get loss and predictions preds, J = jac_closure() fxs.append(preds) J_list.append(J) # each J in n x p Jv_list.append(J @ p) vector_to_parameters(mu, parameters) fx_hat = torch.mean(torch.stack(fxs), 0).flatten() J_hat = torch.mean(torch.stack(J_list), 0) Jv_hat = torch.mean(torch.stack(Jv_list), 0) mu_pred = fx_hat + J_hat @ mu - Jv_hat std_pred = torch.sqrt( torch.diag(J_hat @ torch.diag(1. / precision) @ J_hat.t())) if ret_jac: return (fx_hat.detach().numpy(), (J_hat @ mu).detach().numpy(), Jv_hat.detach().numpy(), std_pred.detach().numpy()) return mu_pred.detach().numpy(), std_pred.detach().numpy()
def get_mc_predictions(self, forward_function, inputs, mc_samples=1, ret_numpy=False, *args, **kwargs): """Returns Monte Carlo predictions. Arguments: forward_function (callable): The forward function of the model that takes inputs and returns the outputs. inputs (FloatTensor): The inputs to the model. mc_samples (int): The number of Monte Carlo samples. ret_numpy (bool): If true, the returned list contains numpy arrays, otherwise it contains torch tensors. """ # We only support a single parameter group. parameters = self.param_groups[0]['params'] predictions = [] Precision = self.state['Precision'] mu = self.state['mu'] for _ in range(mc_samples): # Sample a parameter vector: raw_noise = torch.normal(mean=torch.zeros_like(mu), std=1.0) p = torch.addcdiv(mu, 1., raw_noise, torch.sqrt(Precision)) vector_to_parameters(p, parameters) # Call the forward computation function outputs = forward_function(inputs, *args, **kwargs) if ret_numpy: outputs = outputs.data.cpu().numpy() predictions.append(outputs) return predictions
def forward(self, input): input_reshape = torch.zeros(input.size()) input_reshape = input.view(-1, self.num_clusters, self.num_neurons_per_cluster) dim = 2 input_shift = input_reshape z_sorted = torch.sort(input_shift, dim=dim, descending=True)[0] input_size = input_shift.size()[dim] range_values = Variable(torch.arange(1, input_size + 1), requires_grad=False) range_values = range_values.expand_as(z_sorted) bound = Variable(torch.zeros(z_sorted.size()), requires_grad=False) bound = 1 + torch.addcmul(bound, range_values, z_sorted) cumsum_zs = torch.cumsum(z_sorted, dim) is_gt = torch.gt(bound, cumsum_zs).type(torch.FloatTensor) valid = Variable(torch.zeros(range_values.size()), requires_grad=False) valid = torch.addcmul(valid, range_values, is_gt) k_max = torch.max(valid, dim)[0] zs_sparse = Variable(torch.zeros(z_sorted.size()), requires_grad=False) zs_sparse = torch.addcmul(zs_sparse, is_gt, z_sorted) sum_zs = (torch.sum(zs_sparse, dim) - 1) taus = Variable(torch.zeros(k_max.size()), requires_grad=False) taus = torch.addcdiv(taus, (torch.sum(zs_sparse, dim) - 1), k_max) taus = torch.unsqueeze(taus, 1) taus_expanded = taus.expand_as(input_shift) output = Variable(torch.zeros(input_reshape.size())) output = torch.max(output, input_shift - taus_expanded) return output.view( -1, self.num_clusters * self.num_neurons_per_cluster), zs_sparse, taus, is_gt
def forward(self, input): input_reshape = torch.zeros(input.size()) input_reshape = input.view(-1, self.num_clusters, self.num_neurons_per_cluster) dim = 2 #translate for numerical stability input_shift = input_reshape # - torch.max(input_reshape, dim)[0].expand_as(input_reshape) #sorting input in descending order z_sorted = torch.sort(input_shift, dim=dim, descending=True)[0] input_size = input_shift.size()[dim] range_values = Variable(torch.arange(1, input_size+1), requires_grad=False).cuda() range_values = range_values.expand_as(z_sorted) #Determine sparsity of projection bound = Variable(torch.zeros(z_sorted.size()),requires_grad=False).cuda() bound = 1 + torch.addcmul(bound, range_values, z_sorted) cumsum_zs = torch.cumsum(z_sorted, dim) is_gt = torch.gt(bound, cumsum_zs).type(torch.FloatTensor).cuda() valid = Variable(torch.zeros(range_values.size()),requires_grad=False).cuda() valid = torch.addcmul(valid, range_values, is_gt) k_max = torch.max(valid, dim)[0] zs_sparse = Variable(torch.zeros(z_sorted.size()),requires_grad=False).cuda() zs_sparse = torch.addcmul(zs_sparse, is_gt, z_sorted) sum_zs = (torch.sum(zs_sparse, dim) - 1) taus = Variable(torch.zeros(k_max.size()),requires_grad=False).cuda() taus = torch.addcdiv(taus, (torch.sum(zs_sparse, dim) - 1), k_max) taus_expanded = taus.expand_as(input_reshape) output = Variable(torch.zeros(input_reshape.size())).cuda() output = torch.max(output, input_shift - taus_expanded) return output.view(-1, self.num_clusters*self.num_neurons_per_cluster), zs_sparse,taus, is_gt
def poincare_inner_product(self, a, b, sparse_flag=False): """ See 2.34 thesis """ if sparse_flag == False: # g_numerator = 2 * th.sum( (a-b) ** 2, dim=-1, keepdim=False) # g_denominator = (1 - th.sum(a ** 2, dim=-1, keepdim=False)) * (1 - th.sum(b ** 2, dim=-1, keepdim=False)) # g_angle = th.ones(a.shape[0]) + g_numerator / g_denominator # return self.acosh(g_angle) numerator = 2 * self.sqnorm(a - b) denominator = th.mul((th.ones_like(numerator) - self.sqnorm(a)), (th.ones_like(numerator) - self.sqnorm(b))) return self.acosh( th.addcdiv(th.ones_like(numerator), numerator, denominator)) else: i = a._indices() v = a._values() bv = b[i[:]][0] # get corresponding b values diff_vec = th.sparse.FloatTensor(i, v - bv, a.size()) numerator = 2 * self.sparse_sqnorm(diff_vec) # print("SPARSE INNER PRODUCT numerator shape = {}".format(numerator.shape)) b_sparse_match = th.sparse.FloatTensor(i, bv, a.size()) # print("b_sparse_match shape = {}".format(b_sparse_match.shape)) denominator = th.sparse.FloatTensor( i, (1 - th.sum(th.pow(v, 2), dim=-1)) * (1 - th.sum(th.pow(bv, 2), dim=-1)), th.Size([a.shape[0]])) # denominator = th.mul( (th.ones_like(numerator) - self.sparse_sqnorm(a)), (th.ones_like(numerator) - self.sparse_sqnorm(b)) ) # print("SPARSE INNER PRODUCT denominator shape = {}".format(denominator.shape)) v_num = numerator._values() v_den = numerator._values() return th.sparse.FloatTensor(i, self.acosh(1 + v_num / v_den), th.Size([a.shape[0]]))
def forward(self, x): nf = x.size(1) t = x.clone() t.data.fill_(0) norm = torch.sqrt( x.pow(2).sum(1, keepdim=True).expand_as(x).div(nf).add(self.eps)) return torch.addcdiv(t, 1, x, norm)
def compute_normalized_vector(self, batch_size, num_channels): if num_channels == 1: return torch.ones(batch_size) v = torch.randn(batch_size, num_channels) t = torch.zeros(batch_size, num_channels) norm = torch.norm(v, 2, 1, True) v = torch.addcdiv(t, 1.0, v, norm) return v
def backward(self, grad_output): #output_forward, = self.saved_tensors self.output = self.output.view(-1, self.num_clusters, self.num_neurons_per_cluster) grad_output = grad_output.view(-1, self.num_clusters, self.num_neurons_per_cluster) dim = 2 non_zeros = Variable(torch.ne(self.output, 0).type(torch.FloatTensor), requires_grad=False).cuda() mask_grad = Variable(torch.zeros(self.output.size()), requires_grad=False).cuda() mask_grad = torch.addcmul(mask_grad, non_zeros, grad_output) sum_mask_grad = torch.sum(mask_grad, dim) l1_norm_non_zeros = torch.sum(non_zeros, dim) sum_v = Variable(torch.zeros(sum_mask_grad.size()), requires_grad=False).cuda() sum_v = torch.addcdiv(sum_v, sum_mask_grad, l1_norm_non_zeros) self.gradInput = Variable(torch.zeros(grad_output.size())) self.gradInput = torch.addcmul( self.gradInput, non_zeros, grad_output - sum_v.expand_as(grad_output)) self.gradInput = self.gradInput.view( -1, self.num_clusters * self.num_neurons_per_cluster) return self.gradInput
def get_random_vector(batch_size, ndim, device): if ndim == 1: return torch.ones(batch_size, device=device) v = torch.randn(batch_size, ndim, device=device) zeros = torch.zeros_like(v) norm = v.norm(dim=1, keepdim=True) + 1e-10 return torch.addcdiv(zeros, 1.0, v, norm)
def apply_update(self, lr, wc): for (g, dw) in zip(self.g, self.dw): g[0].mul(0.95).add(0.05, dw[0]) g[1].mul(0.95).add(0.05, dw[1]) # this is disgusting but pytorch is abit weird. # torch.mul(Tensor a, Tensor b, Tensor out) # errors becuase its an invalid combination of # elements. # EVEN THOUGH THE DOCS AND ERROR MSG SAYS ITS FINE i = 0 for i in range(len(self.tmp)): self.tmp[i] = (self.dw[i][0].mul(self.dw[i][0]), self.dw[i][1].mul(self.dw[i][1])) for (g2, tmp) in zip(self.g2, self.tmp): g2[0].mul(0.95).add(0.05, tmp[0]) g2[1].mul(0.95).add(0.05, tmp[1]) i = 0 for i in range(len(self.tmp)): self.tmp[i] = (self.g[i][0].mul(self.g[i][0]), self.g[i][1].mul(self.g[i][1])) for (tmp, g2) in zip(self.tmp, self.g2): tmp[0].mul(-1) tmp[1].mul(-1) tmp[0].add(g2[0]) tmp[1].add(g2[1]) tmp[0].add(0.01) tmp[1].add(0.01) tmp[0].sqrt() tmp[1].sqrt() for i in range(len(self.g)): self.deltas[i][0].mul(0) torch.addcdiv(self.deltas[i][0], lr, self.dw[i][0], self.tmp[i][0]) self.deltas[i][1].mul(0) torch.addcdiv(self.deltas[i][1], lr, self.dw[i][1], self.tmp[i][1]) i = 0 for w, b in zip(*[self.parameters()] * 2): w.add(self.deltas[i][0]) b.add(self.deltas[i][1]) i += 1
def step(self, closure=None): """ Performs a single optimization step. replace p.data with gradient delta value. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. Return: """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue # This is different part between Adam and Noisy Adam. grad = p.grad.data - group['eps'] * p.data if grad.is_sparse: raise RuntimeError('Does not support sparse gradients.') state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 # Exponential moving average of gradient values state['exp_avg'] = torch.zeros_like(p.data) # Exponential moving average of squared gradient values state['exp_avg_sq'] = torch.zeros_like(p.data) exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] beta1, beta2 = group['betas'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) # Decay the first and second moment running average coefficient exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) denom = exp_avg_sq.sqrt().add_(group['eps']) bias_correction1 = 1 - beta1**state['step'] bias_correction2 = 1 - beta2**state['step'] step_size = group['lr'] * math.sqrt( bias_correction2) / bias_correction1 p.data = torch.addcdiv(torch.zeros(1), -step_size, exp_avg, denom) return loss
def test_integer_div_deprecated(self, device, dtype): a = torch.tensor(1, device=device, dtype=dtype) b = torch.tensor(1, device=device, dtype=dtype) o = torch.empty(1, device=device, dtype=dtype) # Tests div (including /) deprecation with self.maybeWarnsRegex(UserWarning, '^Integer division.+is deprecated.+'): c = a / b with self.maybeWarnsRegex(UserWarning, '^Integer division.+is deprecated.+'): c = torch.div(a, b) with self.maybeWarnsRegex(UserWarning, '^Integer division.+is deprecated.+'): torch.div(a, b, out=o) # Tests addcdiv deprecation with self.maybeWarnsRegex(UserWarning, '^Integer division.+is deprecated.+'): torch.addcdiv(a, b, b) with self.maybeWarnsRegex(UserWarning, '^Integer division.+is deprecated.+'): torch.addcdiv(a, b, b, out=o)
def dual_gp_params_star(self, model, X, y, sample=False): Us, vs, m_t, _, _ = self.dual_gp_params(model, X, y, sample=sample) s_0 = 1 / self.state['prior_prec'].detach().numpy() if sample: precision = self.state['precision'] mu = self.state['mu'] raw_noise = torch.normal(mean=torch.zeros_like(mu), std=1.0) m_t = torch.addcdiv(mu, 1., raw_noise, torch.sqrt(precision)) m_t = m_t.detach().numpy() return Us, vs, m_t, np.zeros_like(m_t), s_0
def lookup(self, x): # compute the distance between x and memories # # compute distance between x and memory in mem_backgrounds # print(self.mem_backgrounds.size()) # print(x.view(self.mem_dim, 1).size()) x_v = x.view(self.mem_dim, 1) x_v_norm = torch.norm(x_v, 2) source = torch.FloatTensor(self.mem_size, 1).fill_(0) score_b = torch.addcdiv(source, 1, self.mem_backgrounds.mm(x_v), self.mem_backgrounds_norm) / x_v_norm score_a = torch.addcdiv(source, 1, self.mem_apples.mm(x_v), self.mem_apples_norm) / x_v_norm score_l = torch.addcdiv(source, 1, self.mem_lemons.mm(x_v), self.mem_lemons_norm) / x_v_norm scores = [] scores.append(torch.mean(score_b)) scores.append(torch.mean(score_a)) scores.append(torch.mean(score_l)) return scores
def _random_vector(self, nensemble, nbatch, nv): ''' creates a random vector of dimension C with a norm of C^(1/2) (as needed for the projection formula to work) ''' if nv == 1: return torch.ones(nensemble, nbatch) v = torch.randn(nensemble, nbatch, nv) arxilirary_zero = torch.zeros(nensemble, nbatch, nv) vnorm = torch.norm(v, 2, -1, True) v = torch.addcdiv(arxilirary_zero, 1.0, v, vnorm) return v
def _random_vector(C, B): ''' creates a random vector of dimension C with a norm of C^(1/2) (as needed for the projection formula to work) ''' if C == 1: return torch.ones(B) v = torch.randn(B, C) arxilirary_zero = torch.zeros(B, C) vnorm = torch.norm(v, 2, 1, True) v = torch.addcdiv(arxilirary_zero, 1.0, v, vnorm) return v
def update_params_adam(params, grads, opt): ans = [] group = opt.param_groups[0] assert len(opt.param_groups) == 1 for p, grad in zip(params, grads): if grad is None: ans.append(p) continue amsgrad = group['amsgrad'] state = opt.state[p] # State initialization if len(state) == 0: state['step'] = 0 # Exponential moving average of gradient values state['exp_avg'] = torch.zeros_like(p.data) # Exponential moving average of squared gradient values state['exp_avg_sq'] = torch.zeros_like(p.data) if amsgrad: # Maintains max of all exp. moving avg. of sq. grad. values state['max_exp_avg_sq'] = torch.zeros_like(p.data) exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] if amsgrad: max_exp_avg_sq = state['max_exp_avg_sq'] beta1, beta2 = group['betas'] state['step'] += 1 bias_correction1 = 1 - beta1 ** state['step'] bias_correction2 = 1 - beta2 ** state['step'] if group['weight_decay'] != 0: grad.add_(group['weight_decay'], p.data) # Decay the first and second moment running average coefficient exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) if amsgrad: # Maintains the maximum of all 2nd moment running avg. till now torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) # Use the max. for normalizing running avg. of gradient denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) else: denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) step_size = group['lr'] / bias_correction1 # ans.append(p.data.addcdiv(-step_size, exp_avg, denom)) ans.append(torch.addcdiv(p, -step_size, exp_avg, denom)) return ans
def d_k(k, x, basis_x, M, epsilon): m = ReLU() a = m(triple_point_multi(x - basis_x[k])) b = m(triple_point_multi(x - basis_x[M])) numerator = a - b denominator = basis_x[M] - basis_x[k] + epsilon res = torch.zeros(numerator.shape[-1]).cuda() res = torch.addcdiv(res, value=1, tensor1=numerator, tensor2=denominator, out=None) return res
def __init__(self, c_in, c_out, initializer='kaiming'): super(equalized_linear, self).__init__() self.linear = nn.Linear(c_in, c_out, bias=False) if initializer == 'kaiming': torch.nn.init.kaiming_normal(self.linear.weight) elif initializer == 'xavier': torch.nn.init.xavier_normal(self.linear.weight) linear_w = self.linear.weight.data.clone() self.bias = torch.nn.Parameter(torch.FloatTensor(c_out).fill_(0)) self.scale = np.sqrt(linear_w.pow(2).mean()) inv_w = linear_w.clone().fill_(self.scale) t = inv_w.clone().fill_(0) self.linear.weight.data = torch.addcdiv( t, 1, self.linear.weight.data, inv_w) # adjust weights dynamically.
def _random_vector(C, B): """ creates a random vector of dimension C with a norm of C^(1/2) (as needed for the projection formula to work) :param C: int, number of classes :param B: int, number of batch elements :return: """ if C == 1: return torch.ones(B) v=torch.randn(B,C) arxilirary_zero=torch.zeros(B,C) vnorm=torch.norm(v, 2, 1,True) v=torch.addcdiv(arxilirary_zero, 1.0, v, vnorm) return v