def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.hard_sigmoid(x) self.assertIs(y.data.dtype, x_data.dtype) expect = (self.x * 0.2 + 0.5).clip(0, 1) testing.assert_allclose( y.data, expect, **self.check_forward_option)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.hard_sigmoid(x) self.assertIs(y.data.dtype, x_data.dtype) expect = numpy.minimum(1.0, numpy.maximum(0.0, self.x * 0.2 + 0.5)) gradient_check.assert_allclose(y.data, expect, **self.check_forward_option)
def fit_z(self, x, z): n = len(z) if self.init_with == 'logistic': prototype = LogisticRegression() prototype.fit(x, z) self._model = UpliftRampThresholdSGD.PredictiveFunc(dim_out=1, init_l1=L.Linear( in_size=None, out_size=1, initialW=prototype.coef_.astype(np.float32), initial_bias=prototype.intercept_.astype(np.float32))) else: self._model = UpliftRampThresholdSGD.PredictiveFunc(dim_out=1) # self._model = UpliftRampThresholdSGD.Net33(n_2ndunits=3, dim_out=1) # opt = optimizers.SGD(lr=self.lr) # opt = optimizers.AdaDelta(rho=self.rho) # opt = optimizers.RMSprop() opt = optimizers.Adam() # opt.use_cleargrads() # Deprecated opt.setup(self._model) opt.add_hook(chainer.optimizer.WeightDecay(self.reg_level)) # chainer does not support float64 but float32 x32 = x.astype(np.float32) z32 = z.reshape(n, 1) del x, z # set the weight values if self.class_weight is None: r32 = 0.5 * np.ones(z32.shape) else: f = (z32 > 0).astype(np.int) f2r = np.array([self.class_weight[0], self.class_weight[1]]) r32 = f2r[f] r32 = r32.astype(np.float32).reshape(n, 1) for epoch in range(self.n_epochs): self.slope += self.slope_increment if self.online: perm = range(n) else: perm = np.random.permutation(n) # if aim is batch optimization for i in range(0, n, self.batch_size): # Reguire: 0 <= i < n_train and (i - 0) % batch_size == 0. x_batch = x32[perm[i: i + self.batch_size], :] z_batch = z32[perm[i: i + self.batch_size]] r_batch = r32[perm[i: i + self.batch_size]] if self.use_hard_sigmoid: loss = F.sum(r_batch * F.hard_sigmoid(- self.slope * z_batch * self._model(x_batch))) / len(z_batch) / self.slope else: if self.logistic: loss = F.sum(F.log(1 + F.exp(- z_batch * self._model(x_batch)))) / len(z_batch) else: # loss = F.sum(r_batch * F.sigmoid(- self.slope * z_batch * self._model(x_batch))) / len(z_batch) / self.slope # loss = - F.sum(r_batch * self.slope * z_batch * self._model(x_batch)) / len(z_batch) / self.slope loss = F.sum(1 / (1 + F.exp(self.slope * z_batch * self._model(x_batch)))) / (len(z_batch) * self.slope) self._model.cleargrads() loss.backward() opt.update()
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.hard_sigmoid(x) self.assertIs(y.data.dtype, x_data.dtype) expect = numpy.minimum(1.0, numpy.maximum(0.0, self.x * 0.2 + 0.5)) gradient_check.assert_allclose( y.data, expect, **self.check_forward_option)
def forward_cpu(self, x): xp = cuda.get_array_module(*x) y = x[0] p = F.hard_sigmoid(y) ys = xp.random.choice([1,0],numpy.prod(y.shape),p=[p,1-p]).reshape(y.shape) y = numpy.where(ys>0, 1, -1).astype(numpy.float32, copy=False) return y,
def ramp_loss(z): """ Ramp loss function. l(z) = 1 if z <= -1 l(z) = (1-z) / 2 if -1 < z <= 1 l(z) = 0 if 1 < z """ return F.hard_sigmoid(-2.5 * z)
def forward_gpu(self, x): xp = cuda.get_array_module(*x) y = x[0] p = F.hard_sigmoid(y) ys = xp.random.choice([1,0],numpy.prod(y.shape),p=[p,1-p]).reshape(y.shape) y = cuda.elementwise( 'T x', 'T y', 'y = x > 0 ? 1 : -1', 'bst_fwd')( ys) return y,
def __call__(self, x): if self.Wci.data is None: self.initialize_params(x.data.shape) if self.pc is None: self.initialize_state(x.data.shape) ci = F.hard_sigmoid( self.Wxi(x) + self.Whi(self.ph) + F.scale(self.pc, self.Wci, 1)) cf = F.hard_sigmoid( self.Wxf(x) + self.Whf(self.ph) + F.scale(self.pc, self.Wcf, 1)) cc = cf * self.pc + ci * F.tanh(self.Wxc(x) + self.Whc(self.ph)) co = F.hard_sigmoid( self.Wxo(x) + self.Who(self.ph) + F.scale(cc, self.Wco, 1)) ch = co * F.tanh(cc) self.pc = cc self.ph = ch return ch
def __call__(self, x): h1 = F.hard_sigmoid(self.l1(x)) h2 = F.relu(self.l2(h1)) return self.l3(h2)
def f(x): y = functions.hard_sigmoid(x) return y * y
def hard_sigmoid(self, x): return F.hard_sigmoid(x)
def __call__(self, x, img_real): if type(img_real) != chainer.variable.Variable: # if validation set img_real = Variable(img_real) ## Compute latent space from BOLD z = self.predictor(x) ## Generate images from latent space img_fake = self.pretrained_gan.generate_img_from_z(z) img_fake = F.clip(img_fake, -1.0, 1.0) # avoid slight overflow of values (after tanh, up to 1.07) img_fake.volatile = 'OFF' ; img_real.volatile = 'OFF' # workaround an issue during validation ## Get activations of perceptual features _, layer_activations_fake = self.featnet( img_fake, train=False, return_activations=True ) _, layer_activations_real = self.featnet( img_real, train=False, return_activations=True ) # Note that featnet can also return the non-softmaxed final layer activations (=the classes, here in _ ). # Got some bizarre (and no better) results for natural images when also including a class-matching loss. # But (as mentioned in the paper): A loss on higher layers of a convnet trained with a discrete set of # classes (such as ImageNet classes) may *restrict* your reconstructions to these classes, which is # not desired. Computing a loss within a continuous semantic space may be a solution here. ## Compute perceptual losses loss = 0.0 if self.featnet != None: for layer_idx in ['pixel'] + args.featn_layers: if layer_idx == 'pixel': # compute pixel loss l_px loss_px = args.lambda_pixel * ( F.mean_absolute_error( F.resize_images(img_fake, (args.small_img_dims,args.small_img_dims)), F.resize_images(img_real, (args.small_img_dims,args.small_img_dims)) ) ) loss += loss_px else: layer_idx = int(layer_idx) activ_fake_pos = F.hard_sigmoid( layer_activations_fake[layer_idx]*3.0 - 3.*args.featthre ) activ_real_pos = F.hard_sigmoid( layer_activations_real[layer_idx]*3.0 - 3.*args.featthre ) # using hard_sigmoid for a differentiable binarization at threshold 1.0 if int(layer_idx) == 0: # negative feature activations only make sense for conv1 activ_fake_neg = F.hard_sigmoid( -1.0*layer_activations_fake[layer_idx]*3.0 - 3.*args.featthre ) activ_real_neg = F.hard_sigmoid( -1.0*layer_activations_real[layer_idx]*3.0 - 3.*args.featthre ) mask_real = (activ_real_pos.data + activ_real_neg.data) > 0 else: # only use positive activations mask_real = activ_real_pos.data > 0 loss_pr_neg = 0 if np.sum(mask_real[:]) > 0.0: # if there are any activations above 1.0 # compute l_l,m loss_mag = args.lambda_magnitude * ( F.mean_squared_error(layer_activations_fake[layer_idx][mask_real], layer_activations_real[layer_idx][mask_real]) ) else: # warn and set magnitude loss to 0.0 (does not happen) loss_mag = 0.0 print "Warning: No magnitude loss" loss += loss_mag report({'loss': loss}, self) # Use this code to check whether gradients were computed: #self.predictor.l1.cleargrads() #loss.backward() #print "Gradients: ", self.predictor.l1.W.grad # Do this for all new loss terms. return loss
def __call__(self, x): return F.hard_sigmoid(x)
def forward(self, inputs, device): x, = inputs return functions.hard_sigmoid(x),