def get_nade_k_mean_field(self, x, input_mask, k): # this procedure uses mask only at the first step of inference # x: all inputs (B,D) # input_mask: input masks (B,D) # output_mask: (B,D) # k: how many step of mf, int # the convergence is indicated by P P = [] for i in range(k): if i == 0: # the first iteration of MeanField if self.init_mean_field: v = x * input_mask + self.marginal * (1 - input_mask) else: v = x * input_mask if self.use_mask: print 'first step of inference uses masks' #mask_as_inputs = 1-input_mask mask_as_inputs = input_mask #mask_as_inputs = 2*input_mask-1 else: print 'first step of inference does not use masks' mask_as_inputs = T.zeros_like(input_mask) else: # the following iterations does not use mask as inputs if self.use_mask: mask_as_inputs = input_mask else: mask_as_inputs = T.zeros_like(input_mask) # mean field if self.center_v: print 'inputs are centered' v_ = v - self.marginal else: print 'inputs not centered' v_ = v h = utils.apply_act(T.dot(v_, self.W1) \ + T.dot(mask_as_inputs, self.Wflags) + self.b1, act=self.hidden_act) if self.n_layers == 2: h = utils.apply_act(T.dot(h, self.W2) + self.b2, act=self.hidden_act) p_x_is_one = T.nnet.sigmoid(T.dot(h, self.V.T) + self.c) # to stabilize the computation p_x_is_one = p_x_is_one * constantX(0.9999) + constantX( 0.0001 * 0.5) # v for the next iteration #v = x * input_mask + p_x_is_one * output_mask v = x * input_mask + p_x_is_one * (1 - input_mask) P.append(p_x_is_one) return P
def f(self, h, in_dim, layer_type, dim, num, act_f, noise_std): layer_name = "f_" + str(num) + "_" z = self.apply_layer(layer_type, h, in_dim, dim, layer_name) m = s = None m = z.mean(0, keepdims=True) s = z.var(0, keepdims=True) # if noise_std == 0: # m = self.annotate_bn(m, layer_name + 'bn', 'mean', # z.shape[0], dim) # s = self.annotate_bn(s, layer_name + 'bn', 'var', # z.shape[0], dim) z = (z - m) / T.sqrt(s + np.float32(1e-10)) z_lat = z + self.rstream.normal(size=z.shape).astype(floatX) * noise_std z = z_lat # Add bias if act_f != "linear": z += self.shared(0.0 * np.ones(dim), layer_name + "b", role=BIAS) # Add Gamma parameter if necessary. (Not needed for all act_f) if act_f in ["sigmoid", "tanh", "softmax"]: c = self.shared(1.0 * np.ones(dim), layer_name + "c", role=WEIGHT) z *= c h = apply_act(z, act_f) return z_lat, m, s, h
def f(self, h, in_dim, layer_type, dim, num, act_f, noise_std): layer_name = 'f_' + str(num) + '_' z = self.apply_layer(layer_type, h, in_dim, dim, layer_name) m = s = None m = z.mean(0, keepdims=True) s = z.var(0, keepdims=True) # if noise_std == 0: # m = self.annotate_bn(m, layer_name + 'bn', 'mean', # z.shape[0], dim) # s = self.annotate_bn(s, layer_name + 'bn', 'var', # z.shape[0], dim) z = (z - m) / T.sqrt(s + np.float32(1e-10)) z_lat = z + self.rstream.normal(size=z.shape).astype( floatX) * noise_std z = z_lat # Add bias if act_f != 'linear': z += self.shared(0.0 * np.ones(dim), layer_name + 'b', role=BIAS) # Add Gamma parameter if necessary. (Not needed for all act_f) if (act_f in ['sigmoid', 'tanh', 'softmax']): c = self.shared(1.0 * np.ones(dim), layer_name + 'c', role=WEIGHT) z *= c h = apply_act(z, act_f) return z_lat, m, s, h
def decoder(self, clean, corr, batch_size): get_unlabeled = lambda x: x[batch_size:] if x is not None else x est = self.new_activation_dict() costs = AttributeDict() costs.denois = AttributeDict() for i, ((_, spec), act_f) in self.layers[::-1]: z_corr = get_unlabeled(corr.z[i]) z_clean = get_unlabeled(clean.z[i]) z_clean_s = get_unlabeled(clean.s.get(i)) z_clean_m = get_unlabeled(clean.m.get(i)) # It's the last layer if i == len(self.layers) - 1: fspec = (None, None) ver = get_unlabeled(corr.h[i]) ver_dim = self.layer_dims[i] top_g = True else: fspec = self.layers[i + 1][1][0] ver = est.z.get(i + 1) ver_dim = self.layer_dims.get(i + 1) top_g = False z_est = self.g(z_lat=z_corr, z_ver=ver, in_dims=ver_dim, out_dims=self.layer_dims[i], num=i, fspec=fspec, top_g=top_g) # For semi-supervised version if z_clean_s: z_est_norm = (z_est - z_clean_m) / z_clean_s else: z_est_norm = z_est z_est_norm = z_est se = SquaredError('denois' + str(i)) costs.denois[i] = se.apply(z_est_norm.flatten(2), z_clean.flatten(2)) \ / np.prod(self.layer_dims[i], dtype=floatX) costs.denois[i].name = 'denois' + str(i) # Store references for later use est.z[i] = z_est est.h[i] = apply_act(z_est, act_f) est.s[i] = None est.m[i] = None return est, costs
def f(self, h, in_dim, spec, num, act_f, path_name, noise_std=0): layer_name = 'f_' + str(num) layer_type, dim = spec z = self.apply_layer(layer_type, h, in_dim, dim, layer_name) m = s = None z_l = self.labeled(z) z_u = self.unlabeled(z) m = z_u.mean(0, keepdims=True) s = z_u.var(0, keepdims=True) m_l = z_l.mean(0, keepdims=True) s_l = z_l.var(0, keepdims=True) if path_name == 'clean': # Batch normalization estimates the mean and variance of # validation and test sets based on the training set # statistics. The following annotates the computation of # running average to the graph. m_l = self.annotate_bn(m_l, layer_name + 'bn', 'mean', z_l.shape[0], dim) s_l = self.annotate_bn(s_l, layer_name + 'bn', 'var', z_l.shape[0], dim) z = self.join( (z_l - m_l) / T.sqrt(s_l + np.float32(1e-10)), (z_u - m) / T.sqrt(s + np.float32(1e-10))) if noise_std > 0: z += self.rstream.normal(size=z.shape).astype(floatX) * noise_std # z for lateral connection z_lat = z b_init, c_init = 0.0, 1.0 b_c_size = dim # Add bias if act_f != 'linear': z += self.shared(b_init * np.ones(b_c_size), layer_name + 'b', role=BIAS) # Add Gamma parameter if necessary. (Not needed for all act_f) if (act_f in ['sigmoid', 'tanh', 'softmax']): c = self.shared(c_init * np.ones(b_c_size), layer_name + 'c', role=WEIGHT) z *= c h = apply_act(z, act_f) return dim, z_lat, m, s, h
def decoder(self, clean, corr): est = self.new_activation_dict() costs = AttributeDict() costs.denois = AttributeDict() for i, ((_, spec), act_f) in self.layers[::-1]: z_corr = corr.unlabeled.z[i] z_clean = clean.unlabeled.z[i] z_clean_s = clean.unlabeled.s.get(i) z_clean_m = clean.unlabeled.m.get(i) # It's the last layer if i == len(self.layers) - 1: fspec = (None, None) ver = corr.unlabeled.h[i] ver_dim = self.layer_dims[i] top_g = True else: fspec = self.layers[i + 1][1][0] ver = est.z.get(i + 1) ver_dim = self.layer_dims.get(i + 1) top_g = False z_est = self.g(z_lat=z_corr, z_ver=ver, in_dims=ver_dim, out_dims=self.layer_dims[i], num=i, fspec=fspec, top_g=top_g) # The first layer if z_clean_s: z_est_norm = (z_est - z_clean_m) / z_clean_s else: z_est_norm = z_est se = SquaredError('denois' + str(i)) costs.denois[i] = se.apply(z_est_norm.flatten(2), z_clean.flatten(2)) \ / np.prod(self.layer_dims[i], dtype=floatX) costs.denois[i].name = 'denois' + str(i) # Store references for later use est.z[i] = z_est est.h[i] = apply_act(z_est, act_f) est.s[i] = None est.m[i] = None return est, costs
def decoder(self, clean, corr, batch_size): get_unlabeled = lambda x: x[batch_size:] if x is not None else x est = self.new_activation_dict() costs = AttributeDict() costs.denois = AttributeDict() for i, ((_, spec), act_f) in self.layers[::-1]: z_corr = get_unlabeled(corr.z[i]) z_clean = get_unlabeled(clean.z[i]) z_clean_s = get_unlabeled(clean.s.get(i)) z_clean_m = get_unlabeled(clean.m.get(i)) # It's the last layer if i == len(self.layers) - 1: fspec = (None, None) ver = get_unlabeled(corr.h[i]) ver_dim = self.layer_dims[i] top_g = True else: fspec = self.layers[i + 1][1][0] ver = est.z.get(i + 1) ver_dim = self.layer_dims.get(i + 1) top_g = False z_est = self.g( z_lat=z_corr, z_ver=ver, in_dims=ver_dim, out_dims=self.layer_dims[i], num=i, fspec=fspec, top_g=top_g ) # For semi-supervised version if z_clean_s: z_est_norm = (z_est - z_clean_m) / z_clean_s else: z_est_norm = z_est z_est_norm = z_est se = SquaredError("denois" + str(i)) costs.denois[i] = se.apply(z_est_norm.flatten(2), z_clean.flatten(2)) / np.prod( self.layer_dims[i], dtype=floatX ) costs.denois[i].name = "denois" + str(i) # Store references for later use est.z[i] = z_est est.h[i] = apply_act(z_est, act_f) est.s[i] = None est.m[i] = None return est, costs