def compute_wass_dist_grad_chainer(At_, Bt_, verbose=1, **kwargs): """CUDA ready: implementation based on NumPy/CuPy""" xp = cuda.get_array_module(At_) d1, d2, d3 = At_.shape[:-1] m = At_.shape[-1] n = Bt_.shape[-1] At = Variable(At_) Bt = Variable(Bt_) if verbose > 0: print('Computing Sinkhorn distances...') d = sinkhorn_chainer(At, Bt, **kwargs) if verbose > 0: print('Computing gradients...') G = Variable(xp.zeros((*At.shape, Bt.shape[-1]), dtype=np.float64)) for i in range(m): for j in range(n): if verbose > 1: print(' element (%d, %d)' % (i, j)) At.cleargrad() d_ = d[i, j] d_.backward() G.data[:, :, :, i, j] = At.grad[:, :, :, i] return d, G
def recover_3d_structure(measurement_matrix): num_frames = measurement_matrix.shape[0] // 2 U, Z, V = np.linalg.svd(measurement_matrix, full_matrices=False) Z = np.diag(Z) R_ = np.dot(U[:, :3], np.sqrt(Z[:3, :3])) S_ = np.dot(np.sqrt(Z[:3, :3]), V[:3]) I = R_[:num_frames] J = R_[num_frames:] Q = Variable( np.eye(3, dtype=np.float32) + np.random.normal(0, 0.1, (3, 3)).astype(np.float32)) lr = 0.1 minimum_loss_value = 0.0001 target_ii = np.full((num_frames, ), 1.0, dtype=np.float32) target_jj = np.full((num_frames, ), 1.0, dtype=np.float32) target_ij = np.full((num_frames, ), 0.0, dtype=np.float32) for itr in range(1000): loss_ii = fn.sum(fn.matmul(fn.matmul(I, Q), Q.T) * I, axis=1) loss_jj = fn.sum(fn.matmul(fn.matmul(J, Q), Q.T) * J, axis=1) loss_ij = fn.sum(fn.matmul(fn.matmul(I, Q), Q.T) * J, axis=1) loss = fn.mean_squared_error( loss_ii, target_ii) + fn.mean_squared_error( loss_jj, target_jj) + fn.mean_squared_error( loss_ij, target_ij) Q.cleargrad() loss.backward() Q.data -= lr * Q.grad if float(loss.data) < minimum_loss_value: break lr *= 0.995 R = np.dot(R_, Q.data) S = np.dot(np.linalg.inv(Q.data), S_) return R, S, R_, S_
def sinkhorn_dist_grad(At_, Bt_, **kwargs): xp = cuda.get_array_module(At_) m = 1 if At_.ndim == 4: m = At_.shape[3] n = 1 if Bt_.ndim == 4: n = Bt_.shape[3] At = Variable(At_) Bt = Variable(Bt_) print('Computing Sinkhorn distances...') d = sinkhorn_chainer(At, Bt, **kwargs) M = d.data print('Computing gradients...') G = xp.zeros((*At.shape, Bt.shape[-1]), dtype=np.float64) for i in range(m): for j in range(n): print(' element (%d, %d)' % (i, j)) At.cleargrad() d_ = d[i, j] d_.backward() G[:, :, :, i, j] = At.grad[:, :, :, i] return M, G
def sinkhorn_fb(a, b, **kwargs): xp = cuda.get_array_module(a) m = 1 if a.ndim == 4: m = a.shape[3] n = 1 if b.ndim == 4: n = b.shape[3] av = Variable(a) bv = Variable(b) print('forward') d = sinkhorn_chainer(av, bv, **kwargs) M = cuda.to_cpu(d.data) # The actual Jacobian Matrix is of size [(d1*d2*d3)*m]*mn, but since grad_{x_k} d(x_i,y_j) != 0 iif k == i, it is # a sparse Matrix and can thus be reduced to size [(d1*d2*d3)*m]*n, but omitting the n(m-1) zeros in each row. J = xp.empty(shape=(*a.shape[:3], m, n)) print('backward') for j in range(n): d_ = d[:, j] av.cleargrad() prepare_gradient(d_) d_.backward() J[:, :, :, :, j] = av.grad.reshape((*a.shape[:3], m)) return M, J
def _apply_backward(self, x, grid, grads): x = Variable(x) grid = Variable(grid) y = functions.spatial_transformer_sampler(x, grid) x.cleargrad() grid.cleargrad() y.grad = grads y.backward() return x, grid, y
def update_step(net, images, step_size=1.5, end='inception_4c/output', jitter=32, clip=True): offset_x, offset_y = np.random.randint(-jitter, jitter + 1, 2) data = np.roll(np.roll(images, offset_x, -1), offset_y, -2) x = Variable(xp.asarray(data)) x.cleargrad() dest, = net(x, outputs=[end]) objective(dest).backward() g = cuda.to_cpu(x.grad) data[:] += step_size / np.abs(g).mean() * g data = np.roll(np.roll(data, -offset_x, -1), -offset_y, -2) if clip: bias = net.mean.reshape((1, 3, 1, 1)) data[:] = np.clip(data, -bias, 255 - bias) return data
# Train loop for batch_idx in range(100): # Get data batch_x, batch_y = get_batch(W_target, b_target) # Forward pass y_pred = model(batch_x, W, b) # 損失関数 MSE(mean square error) loss = F.mean_squared_error(y_pred, batch_y) # Manually zero the gradients after updating weights # パラメータの勾配をゼロ化する.(重要) W.cleargrad() b.cleargrad() # Backward pass loss.backward() # Apply gradients learning_rate = 0.1 W.data = W.data - learning_rate * W.grad b.data = b.data - learning_rate * b.grad # Stop criterion if loss.data < 1.e-3: break # 計算結果の出力
# x00v = Variable(x0[:, :, :, 0]) # x01v = Variable(x0[:, :, :, 0].reshape((*x0.shape[:3], 1))) # # d_ref = sinkhorn(x0, x1) # d0 = sinkhorn(x0[:, :, :, 0], x1) # d00 = sinkhorn(x0[:, :, :, 0].reshape((*x0.shape[:3], 1)), x1) # d_ref1 = sinkhorn_chainer(x0v, x1v) # d1 = sinkhorn_chainer(x00v, x1v) # d11 = sinkhorn_chainer(x01v, x1v) d = sinkhorn_chainer(x0v, x1v).reshape((4, 4)) prepare_gradient(d) d.backward() x00g = x0v.grad x0v.cleargrad() d0 = d[0, 1] d1 = d[1, 1] x0v.cleargrad() d0.backward() x01g = x0v.grad x0v.cleargrad() d1.backward() x11g = x0v.grad print('first test')
# n = Variable(np.random.randn(B, D).astype('f')) a = Variable(np.array([[2, 1]]).astype('f')) p = Variable(np.array([[1, 3]]).astype('f')) n = Variable(np.array([[2, 2]]).astype('f')) alpha = np.deg2rad(alpha_in_degree) step_size = 0.01 for i in range(100): plt.plot(*a.data[0], 'o', label='anchor') plt.plot(*p.data[0], 'o', label='positive') plt.plot(*n.data[0], 'o', label='negative') plt.axes().set_aspect('equal') plt.axis((0, 5, 0, 5)) plt.grid() plt.legend() plt.show() loss = angular_loss(a, p, n, alpha) # loss = F.triplet(a, p, n, 10) print(i, loss) if np.allclose(loss.data, 0): break a.cleargrad() p.cleargrad() n.cleargrad() loss.backward(True) a.data -= step_size * a.grad p.data -= step_size * p.grad n.data -= step_size * n.grad
class AdvImage(object): """ This object performs adversarial attack to one image. original image : Image Net Neural Net models : VGG16, GoogLeNet, ResNet152 Attack methods : (iterative) fast gradient sign methods """ uses_device = None xp = None model_name = None model = None size = None mean = None last_layer = None def __init__(self, image_path, image_index, uses_device=0): """ Set an original image and index. """ self.path = image_path self.index = image_index self.ORG_image = Image.open(image_path).convert('RGB') self.org_image = None # resized image self.target = None self.adv_image = None # adversarial image @classmethod def set_model(cls, model_name, uses_device=0): """ Set model and device. uses_device = -1 : CPU uses_device >= 0 : GPU (default 0) """ # use gpu or cpu cls.uses_device = uses_device if uses_device >= 0: chainer.cuda.get_device_from_id(uses_device).use() chainer.cuda.check_cuda_available() import cupy as xp else: xp = np cls.xp = xp # set model cls.model_name = model_name if model_name == "VGG16": cls.model = L.VGG16Layers() cls.last_layer = 'fc8' cls.size = (224, 224) cls.mean = [103.939, 116.779, 123.68] elif model_name == "GoogLeNet": cls.model = L.GoogLeNet() cls.last_layer = 'loss3_fc' cls.size = (224, 224) cls.mean = [104.0, 117.0, 123.0] elif model_name == "ResNet152": cls.model = L.ResNet152Layers() cls.last_layer = 'fc6' cls.size = (224, 224) cls.mean = [103.063, 115.903, 123.152] else: raise Exception("Invalid model") if uses_device >= 0: cls.model.to_gpu() #for memory saving for param in cls.model.params(): param._requires_grad = False def set_state(self): """ Set a variable which correspnds to the adversarial image. """ if AdvImage.model is None: raise Exception("model is not set") self.org_image = self.ORG_image.resize(AdvImage.size) if self.adv_image is None: self.target = self._prepare_variable(self.org_image) self.adv_image = self._restore_image(self.target) else: self.target = self._prepare_variable(self.adv_image) def reset_state(self): """ Reset the adversarial image and the corresponding variable. """ self.target = self._prepare_variable(self.org_image) self.adv_image = self._restore_image(self.target) def _prepare_variable(self, image): """ Convert PIL.Image to chainer.variable. """ # image must be resized before fed into this method xp = AdvImage.xp arr = xp.array(image, dtype=xp.float32) # image should be copied (to gpu) arr = arr[:, :, ::-1] arr -= xp.array(AdvImage.mean, dtype=xp.float32) arr = arr.transpose((2, 0, 1)) arr = arr.reshape((1,) + arr.shape) return Variable(arr) def _restore_image(self, target): """ Convert chainer.variable to PIL.Image. """ arr = target.data[0].copy() # vaiable.data should be copied (to cpu) arr = cuda.to_cpu(arr) arr = arr.transpose((1, 2, 0)) arr += np.array(AdvImage.mean, dtype=np.float32) arr = arr[:, :, ::-1] return Image.fromarray(arr.astype(np.uint8), 'RGB') def _save_image(self, image_obj, dir_path, model_name): model_dir = os.path.join(dir_path, model_name) if os.path.exists(model_dir) is False: os.mkdir(model_dir) file_name = "{0}.jpg".format(os.path.basename(self.path).split('.')[0]) file_path = os.path.join(model_dir, file_name) image_obj.save(file_path) def save_adv(self, dir_path): self._save_image(self.adv_image, dir_path, AdvImage.model_name) def save_org(self, dir_path): self._save_image(self.org_image, dir_path, "Original") @classmethod def _pred(cls, image): res = cls.model.predict([image], oversample=False).data[0] res = cuda.to_cpu(res) pred_index = np.argmax(res) prob = res[pred_index] return pred_index, prob def pred_org(self): return AdvImage._pred(self.org_image) def pred_adv(self): return AdvImage._pred(self.adv_image) ## adversarial attacks ##################### def fast_gradient(self, eps): xp = AdvImage.xp out_layer = AdvImage.last_layer x = AdvImage.model(self.target, layers=[out_layer])[out_layer] t = xp.array([self.index], dtype=xp.int32) loss = F.softmax_cross_entropy(x, t) self.target.cleargrad() AdvImage.model.cleargrads() loss.backward() perturb = xp.sign(self.target.grad) self.target = Variable(self.target.data + eps * perturb) self.adv_image = self._restore_image(self.target) def iterative_gradient(self, eps, alpha =1, n_iter = None): xp = AdvImage.xp if n_iter is None: n_iter = int(min(eps + 4, 1.25 * eps)) t = xp.array([self.index], dtype=xp.int32) out_layer = AdvImage.last_layer target_org = self.target.data.copy() for _ in range(n_iter): x = AdvImage.model(self.target, layers=[out_layer])[out_layer] loss = F.softmax_cross_entropy(x, t) self.target.cleargrad() AdvImage.model.cleargrads() loss.backward() perturb = xp.sign(self.target.grad) updated_data = self.target.data + alpha * perturb clipped_data = xp.clip(updated_data, target_org - eps, target_org + eps) self.target = Variable(clipped_data) self.adv_image = self._restore_image(self.target) def iterative_least_likely(self, eps, alpha =1, n_iter = None, index = None): xp = AdvImage.xp if n_iter is None: n_iter = int(min(eps + 4, 1.25 * eps)) if index is None: probs = AdvImage.model.predict([self.org_image], oversample=False).data[0] probs = cuda.to_cpu(probs) least_index = np.argmin(probs) t = xp.array([least_index], dtype=xp.int32) out_layer = AdvImage.last_layer target_org = self.target.data.copy() for _ in range(n_iter): x = AdvImage.model(self.target, layers=[out_layer])[out_layer] loss = F.softmax_cross_entropy(x, t) self.target.cleargrad() AdvImage.model.cleargrads() loss.backward() perturb = xp.sign(self.target.grad) updated_data = self.target.data - alpha * perturb clipped_data = xp.clip(updated_data, target_org - eps, target_org + eps) self.target = Variable(clipped_data) self.adv_image = self._restore_image(self.target)
def compute_error_cupy_cuda(batchsize, label_length, seq_length, vocab_size, total_labels_to_fill, repeat=3): xp = cupy label_unigram = xp.random.randint(1, total_labels_to_fill, size=(batchsize, label_length)).astype(xp.int32) num_transitions_to_same_label = xp.count_nonzero( label_unigram == xp.roll(label_unigram, 1, axis=1)) assert seq_length >= label_length + num_transitions_to_same_label + 1 length_unigram = xp.full((batchsize, ), label_length, dtype=np.int32) blank_symbol = 0 x_data = xp.random.normal( 0, 1, size=batchsize * vocab_size * seq_length).reshape( (batchsize, vocab_size, seq_length)).astype(xp.float32) x = Variable(x_data) out_data = F.swapaxes(x, 1, 2) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) x_length = Variable(xp.full((batchsize, ), seq_length, dtype=np.int32)) loss_cuda = cuda_ctc.connectionist_temporal_classification( out_data, label_unigram, blank_symbol, x_length, Variable(length_unigram), reduce="mean") loss_cupy = cupy_ctc.connectionist_temporal_classification( out_data, label_unigram, blank_symbol, x_length, Variable(length_unigram), reduce="mean") error_forward = abs(float(loss_cupy.data) - float(loss_cuda.data)) assert error_forward < 5e-4, "error={}, batchsize={}, label_length={}, seq_length={}, vocab={}, labels={}, loss_cupy={}, loss_cuda={}".format( error_forward, batchsize, label_length, seq_length, vocab_size, total_labels_to_fill, loss_cupy.data, loss_cuda.data) x.cleargrad() loss_cuda.backward() grad_cuda = x.grad.copy() loss_cupy.backward() grad_cupy = x.grad.copy() error_backward = float(xp.mean(abs(grad_cupy - grad_cuda))) assert error_backward < 5e-3, "error={}, batchsize={}, label_length={}, seq_length={}, vocab={}, labels={}, loss_cupy={}, loss_cuda={}".format( error_backward, batchsize, label_length, seq_length, vocab_size, total_labels_to_fill, loss_cupy.data, loss_cuda.data) return error_forward, error_backward
class LSM(): """ chainerのモデル風に使えるモデル. """ def __init__(self, *, dimension=2, learning_rate=0.1, define_by_run=False): self.dimension = dimension self.learning_rate = learning_rate self.define_by_run = define_by_run if self.define_by_run: self.w = numpy.random.randn(self.dimension + 1) self.w = self.w.astype(numpy.float32) self.w = Variable(self.w.reshape(self.dimension + 1)) self.w.cleargrad() if self.w.grad is None: self.grads = numpy.zeros([self.dimension + 1]) else: self.grads = self.w.grad.reshape(self.dimension + 1) else: self.w = numpy.random.randn(self.dimension + 1) self.grads = numpy.zeros([self.dimension + 1]) def __call__(self, *args): # パラメータが多すぎたらエラー if (len(args) > 2): print("Please check parameter.") elif (len(args) > 0): # ただのスコア計算なら self.x = numpy.array(args[0]) self.x = self.x.astype(numpy.float32) self.data = self.__score__() if self.define_by_run: pred_y = self.data self.data = self.data.data.reshape(self.data.data.shape[0]) # 学習するなら if (len(args) > 1): self.y = numpy.array(args[1]) self.y = self.y.astype(numpy.float32) if self.define_by_run: self.J = func_J(Variable(self.y), pred_y) else: self.error = (self.y - self.data) return self def __score__(self): """ データ点を入れたときのyの推定値. """ if self.define_by_run: scores = func_y(self.w, Variable(self.x), self.dimension) else: self.X = numpy.array([(x**numpy.ones([self.dimension + 1])) for x in self.x]) self.X = self.X**numpy.arange(self.dimension + 1) scores = numpy.dot(self.X, self.w) return scores def zerograds(self): attr_self = [i for i in dir(self) if "__" not in i] if "x" in attr_self: del self.x if "y" in attr_self: del self.y if "X" in attr_self: del self.X if "data" in attr_self: del self.data if "error" in attr_self: del self.error if self.define_by_run: self.w.cleargrad() if self.w.grad is None: self.grads = numpy.zeros([self.dimension + 1]) else: self.grads = self.w.grad.reshape(self.dimension + 1) else: self.grads = numpy.zeros([self.dimension + 1]) def backward(self): if self.define_by_run: self.J.backward(retain_grad=True) self.grads = -self.w.grad.reshape(self.dimension + 1) else: self.grads = numpy.dot(self.error, self.X)