def load_energy_model(model_params_dict): # FEATURE LAYER 0 (DECONV) print 'SET ENERGY FEATURE CONV LAYER 0' conv_w0 = sharedX(model_params_dict[ 'feat_conv_w0'], name='feat_conv_w0') conv_b0 = sharedX(model_params_dict[ 'feat_conv_b0'], name='feat_conv_b0') # FEATURE LAYER 1 (DECONV) print 'SET ENERGY FEATURE CONV LAYER 1' conv_w1 = sharedX(model_params_dict[ 'feat_conv_w1'], name='feat_conv_w1') conv_b1 = sharedX(model_params_dict[ 'feat_conv_b1'], name='feat_conv_b1') # FEATURE LAYER 2 (DECONV) print 'SET ENERGY FEATURE CONV LAYER 2' conv_w2 = sharedX(model_params_dict[ 'feat_conv_w2'], name='feat_conv_w2') conv_b2 = sharedX(model_params_dict[ 'feat_conv_b2'], name='feat_conv_b2') print 'SET ENERGY FEATURE EXTRACTOR' def energy_feature_function(input_data, is_train=True): # layer 0 (conv) h0 = relu(dnn_conv(input_data, conv_w0, subsample=(2, 2), border_mode=(2, 2))+conv_b0.dimshuffle('x', 0, 'x', 'x')) # layer 1 (conv) h1 = relu(dnn_conv( h0, conv_w1, subsample=(2, 2), border_mode=(2, 2))+conv_b1.dimshuffle('x', 0, 'x', 'x')) # layer 2 (conv) h2 = tanh(dnn_conv( h1, conv_w2, subsample=(2, 2), border_mode=(2, 2))+conv_b2.dimshuffle('x', 0, 'x', 'x')) feature = T.flatten(h2, 2) return feature # ENERGY LAYER (LINEAR) print 'SET ENERGY FUNCTION LINEAR LAYER 3' norm_w = sharedX(model_params_dict[ 'gen_norm_w'], name='gen_norm_w') norm_b = sharedX(model_params_dict[ 'gen_norm_b'], name='gen_norm_b') def energy_normalize_function(input_data, is_train=True): input_data = T.flatten(input_data, 2) return batchnorm(input_data, g=norm_w, b=norm_b, a=0.0) expert_w = sharedX(model_params_dict[ 'eng_expert_w'], name='eng_expert_w') expert_b = sharedX(model_params_dict[ 'eng_expert_b'], name='eng_expert_b') def energy_expert_function(feature_data, is_train=True): e = softplus(T.dot(feature_data, expert_w)+expert_b) e = T.sum(-e, axis=1, keepdims=True) return e def energy_prior_function(input_data, is_train=True): e = T.sum(T.sqr(input_data), axis=1, keepdims=True) return e energy_params = [conv_w0, conv_b0, conv_w1, conv_b1, conv_w2, conv_b2, norm_w, norm_b, expert_w, expert_b] return [energy_feature_function, energy_normalize_function, energy_expert_function, energy_prior_function, energy_params]
def get_params(model_file, n_layers, n_f, nz=100, nc=3): print('LOADING...') t = time() disc_params = init_disc_params(n_f=n_f, n_layers=n_layers, nc=nc) gen_params = init_gen_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc) predict_params = init_predict_params(nz=nz, n_f=n_f, n_layers=n_layers) # load the model model = utils.PickleLoad(model_file) print('load model from %s' % model_file) set_model(disc_params, model['disc_params']) set_model(gen_params, model['gen_params']) if 'predict_params' in model: set_model(predict_params, model['predict_params']) disc_batchnorm = model['disc_batchnorm'] gen_batchnorm = model['gen_batchnorm'] if 'predict_batchnorm' in model: predict_batchnorm = model['predict_batchnorm'] predict_batchnorm = [sharedX(d) for d in predict_batchnorm] else: predict_batchnorm = None disc_batchnorm = [sharedX(d) for d in disc_batchnorm] gen_batchnorm = [sharedX(d) for d in gen_batchnorm] print('%.2f seconds to load theano models' % (time() - t)) return disc_params, gen_params, predict_params, disc_batchnorm, gen_batchnorm, predict_batchnorm
def __init__(self,model, dis_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5)), gen_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5))): X = model.X Z = model.Z targets = T.matrix() genX = model.genX disX = model.disX disgenX = model.disgenX disX_loss = bce(disX, T.ones(disX.shape)).mean() disgenX_loss = bce(disgenX, T.zeros(disgenX.shape)).mean() genX_loss = bce(disgenX, T.ones(disgenX.shape)).mean() dis_loss = disX_loss + disgenX_loss gen_loss = genX_loss trainable_discrim_params = model.trainable_discrim_params trainable_gen_params = model.trainable_gen_params dis_updates = dis_updater(trainable_discrim_params, dis_loss) + model.other_discrim_updates gen_updates = gen_updater(trainable_gen_params, gen_loss) + model.other_gen_updates print 'COMPILING' t = time() self._train_gen = theano.function([Z], gen_loss, updates=gen_updates) self._train_dis = theano.function([X, Z], dis_loss, updates=dis_updates) self._gen = theano.function([Z], genX) print '%.2f seconds to compile theano functions'%(time()-t)
def __init__(self, td_modules, bu_modules_gen, im_modules_gen, bu_modules_inf, im_modules_inf, merge_info): # grab the bottom-up, top-down, and info merging modules self.td_modules = [m for m in td_modules] self.bu_modules_gen = [m for m in bu_modules_gen] self.im_modules_gen = [m for m in im_modules_gen] self.bu_modules_inf = [m for m in bu_modules_inf] self.im_modules_inf = [m for m in im_modules_inf] # get dicts for referencing modules by name self.td_modules_dict = {m.mod_name: m for m in td_modules} self.td_modules_dict[None] = None self.bu_modules_gen_dict = {m.mod_name: m for m in bu_modules_gen} self.bu_modules_gen_dict[None] = None self.bu_modules_inf_dict = {m.mod_name: m for m in bu_modules_inf} self.bu_modules_inf_dict[None] = None self.im_modules_gen_dict = {m.mod_name: m for m in im_modules_gen} self.im_modules_gen_dict[None] = None self.im_modules_inf_dict = {m.mod_name: m for m in im_modules_inf} self.im_modules_inf_dict[None] = None # grab the full set of trainable parameters in these modules self.gen_params = [] # modules that aren't just for inference self.inf_params = [] # modules that are just for inference # get generator params (these only get to adapt to the training set) self.generator_modules = self.td_modules + self.bu_modules_gen + \ self.im_modules_gen for mod in self.generator_modules: self.gen_params.extend(mod.params) # get inferencer params (these can be fine-tuned at test time) self.inferencer_modules = self.bu_modules_inf + self.im_modules_inf for mod in self.inferencer_modules: self.inf_params.extend(mod.params) # filter redundant parameters, to allow parameter sharing p_dict = {} for p in self.gen_params: p_dict[p.name] = p self.gen_params = p_dict.values() p_dict = {} for p in self.inf_params: p_dict[p.name] = p self.inf_params = p_dict.values() # add a distribution scaling parameter to the generator self.dist_scale = sharedX(floatX([0.2])) self.gen_params.append(self.dist_scale) # gather a list of all parameters in this network self.all_params = self.inf_params + self.gen_params # get instructions for how to merge bottom-up and top-down info self.merge_info = merge_info # make a switch for alternating between generator and inferencer # conditionals over the latent variables self.sample_switch = sharedX(floatX([1.0])) return
def transform_im(x, npx=64, nc=3): if nc == 3: x1 = (x + sharedX(1.0)) * sharedX(127.5) else: x1 = T.tile(x, [1, 1, 1, 3]) * sharedX(255.0) #[hack] to-be-tested mean_channel = np.load('../lib/ilsvrc_2012_mean.npy').mean(1).mean(1) mean_im = mean_channel[np.newaxis, :, np.newaxis, np.newaxis] mean_im = floatX(np.tile(mean_im, [1, 1, npx, npx])) x2 = x1[:, [2, 1, 0], :, :] y = x2 - mean_im return y
def transform_im(x, npx=64, nc=3): if nc == 3: # default option # (-1,1) => (0,255) x1 = (x + sharedX(1.0)) * sharedX(127.5) else: x1 = T.tile(x, [1, 1, 1, 3]) * sharedX(255.0) #[hack] to-be-tested mean_channel = np.load(os.path.join( pkg_dir, 'ilsvrc_2012_mean.npy')).mean(1).mean(1) mean_im = mean_channel[np.newaxis, :, np.newaxis, np.newaxis] mean_im = floatX(np.tile(mean_im, [1, 1, npx, npx])) x2 = x1[:, [2, 1, 0], :, :] y = x2 - mean_im return y
def def_invert(self, model, batch_size=1, beta=0.5, lr=0.1, b1=0.9, nz=100, use_bin=True): beta_r = sharedX(beta) x_c = T.tensor4() m_c = T.tensor4() x_e = T.tensor4() m_e = T.tensor4() z0 = T.matrix() z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))) gx = model.model_G(z) mm_c = T.tile(m_c, (1, gx.shape[1], 1, 1)) color_all = T.mean(T.sqr(gx - x_c) * mm_c, axis=(1, 2, 3)) / ( T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5)) gx_edge = HOGNet.get_hog(gx, use_bin) x_edge = HOGNet.get_hog(x_e, use_bin) mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1)) sum_e = T.sum(T.abs_(mm_e)) sum_x_edge = T.sum(T.abs_(x_edge)) edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / ( T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5)) rec_all = color_all + edge_all * sharedX(0.2) z_const = sharedX(10.0) init_all = T.mean(T.sqr(z0 - z)) * z_const if beta > 0: print('using D') p_gen = model.model_D(gx) real_all = T.nnet.binary_crossentropy(p_gen, T.ones( p_gen.shape)).T # costs.bce(p_gen, T.ones(p_gen.shape)) cost_all = rec_all + beta_r * real_all[0] + init_all else: print('without D') cost_all = rec_all + init_all real_all = T.zeros(cost_all.shape) cost = T.sum(cost_all) d_updater = updates.Adam( lr=sharedX(lr), b1=sharedX(b1)) # ,regularizer=updates.Regularizer(l2=l2)) output = [ gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge ] print 'COMPILING...' t = time() z_updates = d_updater([z], cost) _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates) print '%.2f seconds to compile _invert function' % (time() - t) return [_invert, z_updates, z, beta_r, z_const]
def def_bfgs(model_G, layer='conv4', npx=64, alpha=0.002): print('COMPILING...') t = time() # 符号化定义 x_f = T.tensor4() x = T.tensor4() z = T.matrix() # 随机种子 tanh = activations.Tanh() gx = model_G(tanh(z)) # 生成的图像 if layer is 'hog': gx_f = HOGNet.get_hog(gx, use_bin=True, BS=4) else: # 调整图像格式 gx_t = AlexNet.transform_im(gx) gx_net = AlexNet.build_model(gx_t, layer=layer, shape=(None, 3, npx, npx)) AlexNet.load_model(gx_net, layer=layer) # AlexNet截止在layer的输出 gx_f = lasagne.layers.get_output(gx_net[layer], deterministic=True) f_rec = T.mean(T.sqr(x_f - gx_f), axis=(1, 2, 3)) * sharedX(alpha) x_rec = T.mean(T.sqr(x - gx), axis=(1, 2, 3)) cost = T.sum(f_rec) + T.sum(x_rec) grad = T.grad(cost, z) output = [cost, grad, gx] _invert = theano.function(inputs=[z, x, x_f], outputs=output) print('%.2f seconds to compile _bfgs function' % (time() - t)) return _invert, z
def get_params(model_file, n_layers, n_f, nz=100, nc=3): print 'LOADING...' t = time() disc_params = init_disc_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc) gen_params = init_gen_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc) # load the model model = utils.PickleLoad(model_file) print 'load model from %s' % model_file set_model(disc_params, model['disc_params']) set_model(gen_params, model['gen_params']) [disc_pl, gen_pl] = model['postlearn_params'] disc_pl = [sharedX(d) for d in disc_pl] gen_pl = [sharedX(d) for d in gen_pl] print '%.2f seconds to load theano models' % (time() - t) return disc_params, gen_params, disc_pl, gen_pl
def __call__(self, shape, name=None): print('called orthogonal init with shape', shape) flat_shape = (shape[0], np.prod(shape[1:])) a = np_rng.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) q = u if u.shape == flat_shape else v # pick the one with the correct shape q = q.reshape(shape) return sharedX(self.scale * q[:shape[0], :shape[1]], name=name)
def __call__(self, shape, name=None): if len(shape) == 2: scale = np.sqrt(2. / shape[0]) elif len(shape) == 4: scale = np.sqrt(2. / np.prod(shape[1:])) else: raise NotImplementedError return sharedX(np_rng.normal(size=shape, scale=scale), name=name)
def get_hog(self, x_o): use_bin = self.use_bin NO = self.NO BS = self.BS nc = self.nc x = (x_o + sharedX(1)) / (sharedX(2)) Gx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) / 4.0 Gy = Gx.T f1_w = [] for i in range(NO): t = np.pi / NO * i g = np.cos(t) * Gx + np.sin(t) * Gy gg = np.tile(g[np.newaxis, np.newaxis, :, :], [1, 1, 1, 1]) f1_w.append(gg) f1_w = np.concatenate(f1_w, axis=0) G = np.concatenate([ Gx[np.newaxis, np.newaxis, :, :], Gy[np.newaxis, np.newaxis, :, :] ], axis=0) G_f = sharedX(floatX(G)) a = np.cos(np.pi / NO) l1 = sharedX(floatX(1 / (1 - a))) l2 = sharedX(floatX(a / (1 - a))) eps = sharedX(1e-3) if nc == 3: x_gray = T.mean(x, axis=1).dimshuffle(0, 'x', 1, 2) else: x_gray = x f1 = sharedX(floatX(f1_w)) h0 = T.abs_(dnn_conv(x_gray, f1, subsample=(1, 1), border_mode=(1, 1))) g = dnn_conv(x_gray, G_f, subsample=(1, 1), border_mode=(1, 1)) if use_bin: gx = g[:, [0], :, :] gy = g[:, [1], :, :] gg = T.sqrt(gx * gx + gy * gy + eps) hk = T.maximum(0, l1 * h0 - l2 * gg) bf_w = np.zeros((NO, NO, 2 * BS, 2 * BS)) b = 1 - np.abs( (np.arange(1, 2 * BS + 1) - (2 * BS + 1.0) / 2.0) / BS) b = b[np.newaxis, :] bb = b.T.dot(b) for n in range(NO): bf_w[n, n] = bb bf = sharedX(floatX(bf_w)) h_f = dnn_conv(hk, bf, subsample=(BS, BS), border_mode=(BS / 2, BS / 2)) return h_f else: return g
def __init__(self,model, dis_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5)), gen_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5)), cls_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5))): X = model.X Z = model.Z y = T.matrix() # input con ceros o unos segun si X es gen o real respectivamente targets = T.matrix() genX = model.genX disX = model.disX classX = model.classX disgenX = model.disgenX classgenX = model.classgenX classXTest = model.classXTest disX_loss = bce(disX, y).mean() disgenX_loss = bce(disgenX, T.zeros(disgenX.shape)).mean() genX_loss = bce(disgenX, T.ones(disgenX.shape)).mean() cls_loss = cce(classX, targets).mean() cls_err = T.mean(T.neq(T.argmax(classXTest,axis=1),T.argmax(targets,axis=1))) dis_loss = disX_loss + disgenX_loss gen_loss = genX_loss trainable_discrim_params = model.trainable_discrim_params trainable_gen_params = model.trainable_gen_params trainable_classif_params = model.trainable_classif_params dis_updates = dis_updater(trainable_discrim_params, dis_loss) + model.other_discrim_updates gen_updates = gen_updater(trainable_gen_params, gen_loss) + model.other_gen_updates cls_updates = cls_updater(trainable_classif_params, cls_loss) + model.other_classif_updates print 'COMPILING' t = time() self._train_gen = theano.function([Z], gen_loss, updates=gen_updates) self._train_dis = theano.function([X, y, Z], dis_loss, updates=dis_updates) self._train_cls = theano.function([X, targets], cls_loss, updates=cls_updates) self._gen = theano.function([Z], genX) self._cls_predict = theano.function([X],classXTest) self._cls_error = theano.function([X,targets], cls_err) print '%.2f seconds to compile theano functions'%(time()-t)
def def_comp_mask(self): BS = self.BS print('COMPILING') t = time() m = T.tensor4() bf_w = np.ones((1, 1, 2 * BS, 2 * BS)) bf = sharedX(floatX(bf_w)) m_b = dnn_conv(m, bf, subsample=(BS, BS), border_mode=(BS / 2, BS / 2)) _comp_mask = theano.function(inputs=[m], outputs=m_b) print('%.2f seconds to compile [compMask] functions' % (time() - t)) return _comp_mask
def __call__(self, shape, name=None): if shape[0] != shape[1]: w = np.zeros(shape) o_idxs = np.arange(shape[0]) i_idxs = np.random.permutation( np.tile(np.arange(shape[1]), shape[0] / shape[1] + 1))[:shape[0]] w[o_idxs, i_idxs] = self.scale else: w = np.identity(shape[0]) * self.scale return sharedX(w, name=name)
def get_params(model_file, n_layers, n_f, nz=100, nc=3): t = time() disc_params = init_disc_params(n_f=n_f, n_layers=n_layers, nc=nc) gen_params = init_gen_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc) predict_params = init_predict_params(nz=nz, n_f=n_f, n_layers=n_layers) # load the model model = utils.PickleLoad(model_file) set_model(disc_params, model['disc_params']) set_model(gen_params, model['gen_params']) set_model(predict_params, model['predict_params']) disc_batchnorm = model['disc_batchnorm'] gen_batchnorm = model['gen_batchnorm'] predict_batchnorm = model['predict_batchnorm'] disc_batchnorm = [sharedX(d) for d in disc_batchnorm] gen_batchnorm = [sharedX(d) for d in gen_batchnorm] predict_batchnorm = [sharedX(d) for d in predict_batchnorm] return disc_params, gen_params, predict_params, disc_batchnorm, gen_batchnorm, predict_batchnorm
def __call__(self, shape, name=None): w = np.zeros(shape) ycenter = shape[2] // 2 xcenter = shape[3] // 2 if shape[0] == shape[1]: o_idxs = np.arange(shape[0]) i_idxs = np.arange(shape[1]) elif shape[1] < shape[0]: o_idxs = np.arange(shape[0]) i_idxs = np.random.permutation( np.tile(np.arange(shape[1]), shape[0] / shape[1] + 1))[:shape[0]] w[o_idxs, i_idxs, ycenter, xcenter] = self.scale return sharedX(w, name=name)
def _add_param(self, name, value, learnable=True, layer_name='', dtype=theano.config.floatX): if self.reuse: assert name in self.source_params, \ 'param "%s does not exist and self.reuse==True' % name param = self.source_params[name][0] existing_shape = param.get_value().shape if value.shape != existing_shape: raise ValueError('Param "%s": incompatible shapes %s vs. %s' % (name, existing_shape, value.shape)) print '(%s) Reusing param "%s" with shape: %s' % \ (layer_name, name, value.shape) else: print '(%s) Adding param "%s" with shape: %s' % \ (layer_name, name, value.shape) param = sharedX(value, dtype=dtype, name=name) assert name not in self._params, 'param "%s already exists' % name self._params[name] = (param, bool(learnable)) return param
def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True): d_weight_r = sharedX(d_weight) x_c = T.tensor4() m_c = T.tensor4() x_e = T.tensor4() m_e = T.tensor4() z0 = T.matrix() z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))) gx = model.model_G(z) # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge if nc == 1: # gx, range [0, 1] => edge, 1 gx3 = 1.0 - gx # T.tile(gx, (1, 3, 1, 1)) else: gx3 = gx mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1)) color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5)) gx_edge = self.hog.get_hog(gx3) x_edge = self.hog.get_hog(x_e) mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1)) sum_e = T.sum(T.abs_(mm_e)) sum_x_edge = T.sum(T.abs_(x_edge)) edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5)) rec_all = color_all + edge_all * sharedX(0.2) z_const = sharedX(5.0) init_all = T.mean(T.sqr(z0 - z)) * z_const if d_weight > 0: print('using D') p_gen = model.model_D(gx) real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T cost_all = rec_all + d_weight_r * real_all[0] + init_all else: print('without D') cost_all = rec_all + init_all real_all = T.zeros(cost_all.shape) cost = T.sum(cost_all) d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1)) output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge] print('COMPILING...') t = time() z_updates = d_updater([z], cost) _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates) print('%.2f seconds to compile _invert function' % (time() - t)) return [_invert, z_updates, z, d_weight_r, z_const]
def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True): d_weight_r = sharedX(d_weight) x_c = T.tensor4() m_c = T.tensor4() x_e = T.tensor4() m_e = T.tensor4() z0 = T.matrix() z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz)))) gx = model.model_G(z) # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge if nc == 1: # gx, range [0, 1] => edge, 1 gx3 = 1.0-gx #T.tile(gx, (1, 3, 1, 1)) else: gx3 = gx mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1)) color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5)) gx_edge = self.hog.get_hog(gx3) x_edge = self.hog.get_hog(x_e) mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1)) sum_e = T.sum(T.abs_(mm_e)) sum_x_edge = T.sum(T.abs_(x_edge)) edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5)) rec_all = color_all + edge_all * sharedX(0.2) z_const = sharedX(5.0) init_all = T.mean(T.sqr(z0 - z)) * z_const if d_weight > 0: print('using D') p_gen = model.model_D(gx) real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T cost_all = rec_all + d_weight_r * real_all[0] + init_all else: print('without D') cost_all = rec_all + init_all real_all = T.zeros(cost_all.shape) cost = T.sum(cost_all) d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1)) output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge] print('COMPILING...') t = time() z_updates = d_updater([z], cost) _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates) print('%.2f seconds to compile _invert function' % (time() - t)) return [_invert, z_updates, z, d_weight_r, z_const]
def def_bfgs(net, layer='conv4', npx=64, alpha=0.002): print('COMPILING...') t = time() x_f = T.tensor4() x = T.tensor4() z = T.matrix() z = theano.printing.Print('this is z')(z) tanh = activations.Tanh() tz = tanh(z) # tz = printing_op(tz) # tz = z_scale * tz net.labels_var = T.TensorType('float32', [False] * 512) ('labels_var') gx = net.G.eval(z, net.labels_var, ignore_unused_inputs=True) # gx = printing_op(gx) # gx = misc.adjust_dynamic_range(gx, [-1,1], [0,1]) scale_factor = 16 gx = theano.tensor.signal.pool.pool_2d(gx, ds=(scale_factor, scale_factor), mode='average_exc_pad', ignore_border=True) # gx = printing_op(gx) if layer is 'hog': gx_f = HOGNet.get_hog(gx, use_bin=True, BS=4) else: gx_t = AlexNet.transform_im(gx) gx_net = AlexNet.build_model(gx_t, layer=layer, shape=(None, 3, npx, npx)) AlexNet.load_model(gx_net, layer=layer) gx_f = lasagne.layers.get_output(gx_net[layer], deterministic=True) f_rec = T.mean(T.sqr(x_f - gx_f), axis=(1, 2, 3)) * sharedX(alpha) x_rec = T.mean(T.sqr(x - gx), axis=(1, 2, 3)) cost = T.sum(f_rec) + T.sum(x_rec) grad = T.grad(cost, z) output = [cost, grad, gx] _invert = theano.function(inputs=[z, x, x_f], outputs=output) print('%.2f seconds to compile _bfgs function' % (time() - t)) return _invert,z
# define pixel loss pixel_loss = costs.L2Loss(gx, x) # define feature loss x_t = AlexNet.transform_im(x, npx=npx, nc=nc) x_net = AlexNet.build_model(x_t, layer=args.layer, shape=(None, 3, npx, npx)) AlexNet.load_model(x_net, layer=args.layer) x_f = lasagne.layers.get_output(x_net[args.layer], deterministic=True) gx_t = AlexNet.transform_im(gx, npx=npx, nc=nc) gx_net = AlexNet.build_model(gx_t, layer=args.layer, shape=(None, 3, npx, npx)) AlexNet.load_model(gx_net, layer=args.layer) gx_f = lasagne.layers.get_output(gx_net[args.layer], deterministic=True) ftr_loss = costs.L2Loss(gx_f, x_f) # add two losses together cost = pixel_loss + ftr_loss * sharedX(args.alpha) output = [cost, z] lrt = sharedX(args.lr) b1t = sharedX(args.b1) p_updater = updates.Adam(lr=lrt, b1=b1t, regularizer=updates.Regularizer(l2=args.weight_decay)) p_updates = p_updater(predict_params, cost) print('COMPILING') t = time() _train_p = theano.function([x], cost, updates=p_updates) _train_p_cost = theano.function([x], [cost, gx]) _predict_z = theano.function([x], z) _gen = theano.function([z], gx) print('%.2f seconds to compile theano functions' % (time() - t))
exec(tmp) # print conditional,type(batchsize),Channel[-1],kernal gifn = inits.Normal(scale=0.02) difn = inits.Normal(scale=0.02) ## filter_shape: (output channels, input channels, filter height, filter width, filter depth) ## load the parameters # gen_params = [gw1, gw2, gw3, gw4, gw5, gwx] # discrim_params = [dw1, dw2, dw3, dw4, dw5, dwy] temp = joblib.load('models%d/50_gen_params.jl' % objectNumber) gw1 = sharedX(temp[0]) gg1 = sharedX(temp[1]) gb1 = sharedX(temp[2]) gw2 = sharedX(temp[3]) gg2 = sharedX(temp[4]) gb2 = sharedX(temp[5]) gw3 = sharedX(temp[6]) gg3 = sharedX(temp[7]) gb3 = sharedX(temp[8]) gw4 = sharedX(temp[9]) gg4 = sharedX(temp[10]) gb4 = sharedX(temp[11]) gwx = sharedX(temp[12]) gen_params = [gw1, gg1, gb1, gw2, gg2, gb2, gw3, gg3, gb3, gw4, gg4, gb4, gwx]
DlZ = sigmoid(T.dot(Dl4, wz)) return DlZ # def gen_Z(dist): # mu = dist[:Nz] # sigma = dist[Nz:] X = T.tensor5() encodeZ = encoder(X, *encode_params) decodeX = decoder(encodeZ, *decode_params) cost = bce(T.flatten(decodeX, 2), T.flatten(X, 2)).mean() lrt = sharedX(lrate) AutoEnc_parameter = encode_params + decode_params updater = updates.Adam(lr=lrt, b1=0.8, regularizer=updates.Regularizer(l2=l2)) updates = updater(AutoEnc_parameter, cost) print 'COMPILING' t = time() _train_ = theano.function([X], cost, updates=updates) print '%.2f seconds to compile theano functions' % (time() - t) mat = scipy.io.loadmat('models_stats.mat') mat = mat['models'] num = np.array(mat[0][0][1]) names = mat[0][0][0][0] objname = []
def __init__(self, leak=0.2): self.leak = sharedX(leak)
X = T.fmatrix() y = T.fvector() theta = T.fmatrix() deltaX = T.fmatrix() # svgd gradient data_N = T.scalar('data_N') block = T.fmatrix() gX_1 = langevin_sampler(X, y, theta, data_N, *net_params) cost_1 = -1 * T.mean(T.sum(gX_1 * deltaX, axis=1)) lrt = sharedX(lr) g_updater_1 = updates.Adagrad(lr=lr, regularizer=updates.Regularizer(l2=l2)) g_updates_1 = g_updater_1(net_params, cost_1) print 'COMPILING' t = time() _gen_1 = theano.function([X, y, theta, data_N], gX_1) _train_g_1 = theano.function([X, y, theta, deltaX, data_N], cost_1, updates=g_updates_1) _svgd_gradient = theano.function([X, y, theta, data_N], svgd_gradient(X, y, theta, data_N)) _score_bayes_lr = theano.function([X, y, theta, data_N], score_bayes_lr(X, y, theta, data_N)) _evaluate = theano.function([X, y, theta], evaluate(X, y, theta)) print '%.2f seconds to compile theano functions'%(time()-t) n_iter = 10000
num_filters_list = [128] lr_list = [1e-3] lambda_eng_list = [1e-5] for lr in lr_list: for num_filters in num_filters_list: for hidden_size in hidden_size_list: for expert_size in expert_size_list: for lambda_eng in lambda_eng_list: model_config_dict["hidden_size"] = hidden_size model_config_dict["expert_size"] = expert_size model_config_dict["min_num_gen_filters"] = num_filters model_config_dict["min_num_eng_filters"] = num_filters # set updates energy_optimizer = Adagrad(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng)) generator_optimizer = Adagrad(lr=sharedX(2.0 * lr)) model_test_name = ( model_name + "_f{}".format(int(num_filters)) + "_h{}".format(int(hidden_size)) + "_e{}".format(int(expert_size)) + "_re{}".format(int(-np.log10(lambda_eng))) + "_lr{}".format(int(-np.log10(lr))) ) train_model( data_stream=data_stream, energy_optimizer=energy_optimizer, generator_optimizer=generator_optimizer, model_config_dict=model_config_dict, model_test_name=model_test_name,
def load_energy_model(num_experts, model_params_dict): # FEATURE LAYER 0 (DECONV) print 'SET ENERGY FEATURE CONV LAYER 0' conv_w0 = sharedX(model_params_dict[ 'feat_conv_w0'], name='feat_conv_w0') conv_b0 = sharedX(model_params_dict[ 'feat_conv_b0'], name='feat_conv_b0') # FEATURE LAYER 1 (DECONV) print 'SET ENERGY FEATURE CONV LAYER 1' conv_w1 = sharedX(model_params_dict[ 'feat_conv_w1'], name='feat_conv_w1') conv_b1 = sharedX(model_params_dict[ 'feat_conv_b1'], name='feat_conv_b1') # FEATURE LAYER 2 (DECONV) print 'SET ENERGY FEATURE CONV LAYER 2' conv_w2 = sharedX(model_params_dict[ 'feat_conv_w2'], name='feat_conv_w2') conv_b2 = sharedX(model_params_dict[ 'feat_conv_b2'], name='feat_conv_b2') # FEATURE LAYER 3 (DECONV) print 'SET ENERGY FEATURE CONV LAYER 3' conv_w3 = sharedX(model_params_dict[ 'feat_conv_w3'], name='feat_conv_w3') conv_b3 = sharedX(model_params_dict[ 'feat_conv_b3'], name='feat_conv_b3') print 'SET ENERGY FEATURE EXTRACTOR' def energy_feature_function(input_data, is_train=True): # layer 0 (conv) h0 = relu(dnn_conv(input_data, conv_w0, subsample=(2, 2), border_mode=(2, 2))+conv_b0.dimshuffle('x', 0, 'x', 'x')) # layer 1 (conv) h1 = relu(dnn_conv( h0, conv_w1, subsample=(2, 2), border_mode=(2, 2))+conv_b1.dimshuffle('x', 0, 'x', 'x')) # layer 2 (conv) h2 = relu(dnn_conv( h1, conv_w2, subsample=(2, 2), border_mode=(2, 2))+conv_b2.dimshuffle('x', 0, 'x', 'x')) # layer 3 (conv) h3 = tanh(dnn_conv( h2, conv_w3, subsample=(2, 2), border_mode=(2, 2))+conv_b3.dimshuffle('x', 0, 'x', 'x')) # output feature feature = T.flatten(h3, 2) return feature # ENERGY FEATURE NORM LAYER (BN) # print 'SET ENERGY FUNCTION FEATURE NORM LAYER' # norm_w = sharedX(model_params_dict[ 'gen_norm_w'], name='gen_norm_w') # norm_b = sharedX(model_params_dict[ 'gen_norm_b'], name='gen_norm_b') # # def energy_normalize_function(feature_data, is_train=True): # return norm_layer(feature_data, g=norm_w, b=norm_b) # ENERGY EXPERT LAYER (LINEAR) print 'SET ENERGY FUNCTION EXPERT LAYER' expert_w = sharedX(model_params_dict[ 'eng_expert_w'], name='eng_expert_w') expert_b = sharedX(model_params_dict[ 'eng_expert_b'], name='eng_expert_b') def energy_expert_function(feature_data, is_train=True): e = softplus(T.dot(feature_data, expert_w)+expert_b) e = T.sum(-e, axis=1, keepdims=True) return e # def energy_prior_function(input_data, is_train=True): # e = num_experts*T.mean(T.sqr(input_data), axis=1, keepdims=True) # return e energy_params = [conv_w0, conv_b0, conv_w1, conv_b1, conv_w2, conv_b2, conv_w3, conv_b3, # norm_w, norm_b, expert_w, expert_b] return [energy_feature_function, # energy_normalize_function, energy_expert_function, # energy_prior_function, energy_params]
ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer nx = npx * npx * nc # # of dimensions in X niter = 3000 # # of iter at starting learning rate niter_decay = 3000 # # of iter to linearly decay learning rate to zero temp = npx / 4 relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() tanh = activations.Tanh() model_path = 'models/cond_dcgan/' gen_params = [ sharedX(p) for p in joblib.load(model_path + '5999_gen_params.jl') ] discrim_params = [ sharedX(p) for p in joblib.load(model_path + '5999_discrim_params.jl') ] def gen(Z, Y, w, w2, w3, wx): yb = Y.dimshuffle(0, 1, 'x', 'x') Z = T.concatenate([Z, Y], axis=1) h = relu(batchnorm(T.dot(Z, w))) h = T.concatenate([h, Y], axis=1) h2 = relu(batchnorm(T.dot(h, w2))) h2 = h2.reshape((h2.shape[0], ngf * 2, temp, temp)) h2 = conv_cond_concat(h2, yb) h3 = relu(batchnorm(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2))))
relu = activations.Rectify() lrelu = activations.LeakyRectify(leak=0.2) sigmoid = activations.Sigmoid() trX, vaX, teX, trY, vaY, teY = svhn_with_valid_set(extra=False) vaX = floatX(vaX)/127.5-1. trX = floatX(trX)/127.5-1. teX = floatX(teX)/127.5-1. X = T.tensor4() desc = 'svhn_unsup_all_conv_dcgan_100z_gaussian_lr_0.0005_64mb' epoch = 200 params = [sharedX(p) for p in joblib.load('../models/%s/%d_discrim_params.jl'%(desc, epoch))] print desc.upper() print 'epoch %d'%epoch def mean_and_var(X): u = T.mean(X, axis=[0, 2, 3]) s = T.mean(T.sqr(X - u.dimshuffle('x', 0, 'x', 'x')), axis=[0, 2, 3]) return u, s def bnorm_statistics(X, w, w2, g2, b2, w3, g3, b3, wy): h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h2 = dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)) h2_u, h2_s = mean_and_var(h2) h2 = lrelu(batchnorm(h2, g=g2, b=b2))
lambda_eng_list = [1e-5] lambda_gen_list = [1e-5] for lr in lr_list: for num_filters in num_filters_list: for hidden_size in hidden_size_list: for expert_size in expert_size_list: for lambda_eng in lambda_eng_list: for lambda_gen in lambda_gen_list: model_config_dict['hidden_size'] = hidden_size model_config_dict['expert_size'] = expert_size model_config_dict['min_num_gen_filters'] = num_filters model_config_dict['min_num_eng_filters'] = num_filters # set updates energy_optimizer_reg_on = Adam(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng)) energy_optimizer_reg_off = Adam(lr=sharedX(lr), regularizer=Regularizer(l2=0.0)) generator_optimizer_reg_on = Adam(lr=sharedX(lr), b1=0.1, b2=0.1, regularizer=Regularizer(l2=lambda_eng)) generator_optimizer_reg_off = Adam(lr=sharedX(lr), b1=0.1, b2=0.1, regularizer=Regularizer(l2=0.0)) model_test_name = model_name \ + '_f{}'.format(int(num_filters)) \ + '_h{}'.format(int(hidden_size)) \ + '_e{}'.format(int(expert_size)) \ + '_re{}'.format(int(-np.log10(lambda_eng))) \ + '_rg{}'.format(int(-np.log10(lambda_gen))) \
def run(hp, folder): trX, trY, nb_classes = load_data() k = 1 # # of discrim updates for each gen update l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam nc = 1 # # of channels in image ny = nb_classes # # of classes nbatch = 128 # # of examples in batch npx = 28 # # of pixels width/height of images nz = 100 # # of dim for Z ngfc = 512 # # of gen units for fully connected layers ndfc = 512 # # of discrim units for fully connected layers ngf = 64 # # of gen filters in first conv layer ndf = 64 # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = 200 # # of iter at starting learning rate niter_decay = 100 # # of iter to linearly decay learning rate to zero lr = 0.0002 # initial learning rate for adam scale = 0.02 k = hp['k'] l2 = hp['l2'] #b1 = hp['b1'] nc = 1 ny = nb_classes nbatch = hp['nbatch'] npx = 28 nz = hp['nz'] ngfc = hp['ngfc'] # # of gen units for fully connected layers ndfc = hp['ndfc'] # # of discrim units for fully connected layers ngf = hp['ngf'] # # of gen filters in first conv layer ndf = hp['ndf'] # # of discrim filters in first conv layer nx = npx*npx*nc # # of dimensions in X niter = hp['niter'] # # of iter at starting learning rate niter_decay = hp['niter_decay'] # # of iter to linearly decay learning rate to zero lr = hp['lr'] # initial learning rate for adam scale = hp['scale'] #k = 1 # # of discrim updates for each gen update #l2 = 2.5e-5 # l2 weight decay b1 = 0.5 # momentum term of adam #nc = 1 # # of channels in image #ny = nb_classes # # of classes budget_hours = hp.get('budget_hours', 2) budget_secs = budget_hours * 3600 ntrain = len(trX) def transform(X): return (floatX(X)).reshape(-1, nc, npx, npx) def inverse_transform(X): X = X.reshape(-1, npx, npx) return X model_dir = folder samples_dir = os.path.join(model_dir, 'samples') if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(samples_dir): os.makedirs(samples_dir) relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() bce = T.nnet.binary_crossentropy gifn = inits.Normal(scale=scale) difn = inits.Normal(scale=scale) gw = gifn((nz, ngfc), 'gw') gw2 = gifn((ngfc, ngf*2*7*7), 'gw2') gw3 = gifn((ngf*2, ngf, 5, 5), 'gw3') gwx = gifn((ngf, nc, 5, 5), 'gwx') dw = difn((ndf, nc, 5, 5), 'dw') dw2 = difn((ndf*2, ndf, 5, 5), 'dw2') dw3 = difn((ndf*2*7*7, ndfc), 'dw3') dwy = difn((ndfc, 1), 'dwy') gen_params = [gw, gw2, gw3, gwx] discrim_params = [dw, dw2, dw3, dwy] def gen(Z, w, w2, w3, wx, use_batchnorm=True): if use_batchnorm: batchnorm_ = batchnorm else: batchnorm_ = lambda x:x h = relu(batchnorm_(T.dot(Z, w))) h2 = relu(batchnorm_(T.dot(h, w2))) h2 = h2.reshape((h2.shape[0], ngf*2, 7, 7)) h3 = relu(batchnorm_(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2)))) x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2))) return x def discrim(X, w, w2, w3, wy): h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2))) h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)))) h2 = T.flatten(h2, 2) h3 = lrelu(batchnorm(T.dot(h2, w3))) y = sigmoid(T.dot(h3, wy)) return y X = T.tensor4() Z = T.matrix() gX = gen(Z, *gen_params) p_real = discrim(X, *discrim_params) p_gen = discrim(gX, *discrim_params) d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) #updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z], cost, updates=g_updates) _train_d = theano.function([X, Z], cost, updates=d_updates) _gen = theano.function([Z], gX) print '%.2f seconds to compile theano functions'%(time()-t) tr_idxs = np.arange(len(trX)) sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz))) def gen_samples(n, nbatch=128): samples = [] labels = [] n_gen = 0 for i in range(n/nbatch): zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz))) xmb = _gen(zmb) samples.append(xmb) n_gen += len(xmb) n_left = n-n_gen zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz))) xmb = _gen(zmb) samples.append(xmb) return np.concatenate(samples, axis=0) s = floatX(np_rng.uniform(-1., 1., size=(10000, nz))) n_updates = 0 n_check = 0 n_epochs = 0 n_updates = 0 n_examples = 0 t = time() begin = datetime.now() for epoch in range(1, niter+niter_decay+1): t = time() print("Epoch {}".format(epoch)) trX = shuffle(trX) for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch): imb = transform(imb) zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz))) if n_updates % (k+1) == 0: cost = _train_g(imb, zmb) else: cost = _train_d(imb, zmb) n_updates += 1 n_examples += len(imb) samples = np.asarray(_gen(sample_zmb)) grayscale_grid_vis(inverse_transform(samples), (10, 20), '{}/{:05d}.png'.format(samples_dir, n_epochs)) n_epochs += 1 if n_epochs > niter: lrt.set_value(floatX(lrt.get_value() - lr/niter_decay)) if n_epochs % 50 == 0 or epoch == niter + niter_decay or epoch == 1: imgs = [] for i in range(0, s.shape[0], nbatch): imgs.append(_gen(s[i:i+nbatch])) img = np.concatenate(imgs, axis=0) samples_filename = '{}/{:05d}_gen.npz'.format(model_dir, n_epochs) joblib.dump(img, samples_filename, compress=9) shutil.copy(samples_filename, '{}/gen.npz'.format(model_dir)) joblib.dump([p.get_value() for p in gen_params], '{}/d_gen_params.jl'.format(model_dir, n_epochs), compress=9) joblib.dump([p.get_value() for p in discrim_params], '{}/discrim_params.jl'.format(model_dir, n_epochs), compress=9) print('Elapsed : {}sec'.format(time() - t)) if (datetime.now() - begin).total_seconds() >= budget_secs: print("Budget finished.quit.") break
z = T.matrix() gx = train_dcgan_utils.gen(z, gen_params, n_layers=n_layers, n_f=n_f, nc=nc) p_real = train_dcgan_utils.discrim(x, disc_params, n_layers=n_layers) p_gen = train_dcgan_utils.discrim(gx, disc_params, n_layers=n_layers) d_cost_real = costs.bce(p_real, T.ones(p_real.shape)) d_cost_gen = costs.bce(p_gen, T.zeros(p_gen.shape)) g_cost_d = costs.bce(p_gen, T.ones(p_gen.shape)) d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(args.lr) d_updater = updates.Adam(lr=lrt, b1=args.b1, regularizer=updates.Regularizer(l2=args.weight_decay)) g_updater = updates.Adam(lr=lrt, b1=args.b1, regularizer=updates.Regularizer(l2=args.weight_decay)) d_updates = d_updater(disc_params, d_cost) g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print('COMPILING') t = time() _train_g = theano.function([x, z], cost, updates=g_updates) _train_d = theano.function([x, z], cost, updates=d_updates) _gen = theano.function([z], gx)
# compute costs based on discriminator output for real/generated data d_cost_real = sum([bce(p, T.ones(p.shape)).mean() for p in p_real]) d_cost_gen = sum([bce(p, T.zeros(p.shape)).mean() for p in p_gen]) g_cost_d = sum([bce(p, T.ones(p.shape)).mean() for p in p_gen]) #d_cost_real = bce(p_real[-1], T.ones(p_real[-1].shape)).mean() #d_cost_gen = bce(p_gen[-1], T.zeros(p_gen[-1].shape)).mean() #g_cost_d = bce(p_gen[-1], T.ones(p_gen[-1].shape)).mean() d_cost = d_cost_real + d_cost_gen + ( 1e-5 * sum([T.sum(p**2.0) for p in discrim_params])) g_cost = g_cost_d + (1e-5 * sum([T.sum(p**2.0) for p in gen_params])) cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z0], cost, updates=g_updates) _train_d = theano.function([X, Z0], cost, updates=d_updates) _gen = theano.function([Z0], gX) print '%.2f seconds to compile theano functions' % (time() - t) f_log = open("{}/{}.ndjson".format(log_dir, desc), 'wb') log_fields = [
num_filters_list = [128] lr_list = [1e-3] lambda_eng_list = [1e-5] for lr in lr_list: for num_filters in num_filters_list: for hidden_size in hidden_size_list: for expert_size in expert_size_list: for lambda_eng in lambda_eng_list: model_config_dict['hidden_size'] = hidden_size model_config_dict['expert_size'] = expert_size model_config_dict['min_num_gen_filters'] = num_filters model_config_dict['min_num_eng_filters'] = num_filters # set updates energy_optimizer = Adagrad(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng)) generator_optimizer = Adagrad(lr=sharedX(2.*lr)) model_test_name = model_name \ + '_f{}'.format(int(num_filters)) \ + '_h{}'.format(int(hidden_size)) \ + '_e{}'.format(int(expert_size)) \ + '_re{}'.format(int(-np.log10(lambda_eng))) \ + '_lr{}'.format(int(-np.log10(lr))) if is_continue is True: continue_train_model(last_batch_idx=last_batch_idx, data_stream=data_stream, energy_optimizer=energy_optimizer, generator_optimizer=generator_optimizer, model_config_dict=model_config_dict,
def load_model(): [e_params, g_params, d_params] = pickle.load(open("faces_dcgan.pkl", "rb")) gwx = g_params[-1] dwy = d_params[-1] # inputs X = T.tensor4() ## encode layer e_layer_sizes = [128, 64, 32, 16, 8] e_filter_sizes = [3, 256, 256, 512, 1024] eX, e_params, e_layers = make_conv_set(X, e_layer_sizes, e_filter_sizes, "e", weights=e_params) ## generative layer g_layer_sizes = [8, 16, 32, 64, 128] g_num_filters = [1024, 512, 256, 256, 128] g_out, g_params, g_layers = make_conv_set(eX, g_layer_sizes, g_num_filters, "g", weights=g_params) g_params += [gwx] gX = tanh(deconv(g_out, gwx, subsample=(1, 1), border_mode=(2, 2))) ## discrim layer(s) df1 = 128 d_layer_sizes = [128, 64, 32, 16, 8] d_filter_sizes = [3, df1, 2 * df1, 4 * df1, 8 * df1] def discrim(input, name, weights=None): d_out, disc_params, d_layers = make_conv_set(input, d_layer_sizes, d_filter_sizes, name, weights=weights) d_flat = T.flatten(d_out, 2) disc_params += [dwy] y = sigmoid(T.dot(d_flat, dwy)) return y, disc_params, d_layers # target outputs target = T.tensor4() p_real, d_params, d_layers = discrim(target, "d", weights=d_params) # we need to make sure the p_gen params are the same as the p_real params p_gen, d_params2, d_layers = discrim(gX, "d", weights=d_params) ## GAN costs d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() ## MSE encoding cost is done on an (averaged) downscaling of the image target_pool = max_pool_2d(target, (4, 4), mode="average_exc_pad", ignore_border=True) target_flat = T.flatten(target_pool, 2) gX_pool = max_pool_2d(gX, (4, 4), mode="average_exc_pad", ignore_border=True) gX_flat = T.flatten(gX_pool, 2) enc_cost = mse(gX_flat, target_flat).mean() ## generator cost is a linear combination of the discrim cost plus the MSE enocding cost d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d + enc_cost / 10 ## if the enc_cost is weighted too highly it will take a long time to train ## N.B. e_cost and e_updates will only try and minimise MSE loss on the autoencoder (for debugging) e_cost = enc_cost cost = [g_cost_d, d_cost_real, enc_cost] elrt = sharedX(0.002) lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) e_updater = updates.Adam(lr=elrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(d_params, d_cost) g_updates = g_updater(e_params + g_params, g_cost) e_updates = e_updater(e_params, e_cost) print 'COMPILING' t = time() _train_g = theano.function([X, target], cost, updates=g_updates) _train_d = theano.function([X, target], cost, updates=d_updates) _train_e = theano.function([X, target], cost, updates=e_updates) _get_cost = theano.function([X, target], cost) print('%.2f seconds to compile theano functions' % (time() - t)) img_dir = "gen_images/" if not os.path.exists(img_dir): os.makedirs(img_dir) ae_encode = theano.function([X, target], [gX, target]) return ae_encode
#target_pool = max_pool_2d(target, (4,4), mode="average_exc_pad",ignore_border=True) target_flat = T.flatten(target, 2) #gX_pool = max_pool_2d(gX, (4,4), mode="average_exc_pad",ignore_border=True) gX_flat = T.flatten(gX,2) enc_cost = mse(gX_flat, target_flat).mean() ## generator cost is a linear combination of the discrim cost plus the MSE enocding cost d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d + enc_cost / 100 ## if the enc_cost is weighted too highly it will take a long time to train ## N.B. e_cost and e_updates will only try and minimise MSE loss on the autoencoder (for debugging) e_cost = enc_cost cost = [g_cost_d, d_cost_real, enc_cost] elrt = sharedX(0.002) lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) e_updater = updates.Adam(lr=elrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(d_params, d_cost) g_updates = g_updater(e_params + g_params, g_cost) e_updates = e_updater(e_params, e_cost) print 'COMPILING' t = time() _train_g = theano.function([X, target], cost, updates=g_updates) _train_d = theano.function([X, target], cost, updates=d_updates) _train_e = theano.function([X, target], cost, updates=e_updates) _get_cost = theano.function([X, target], cost)
td_modules = inf_gen_model.td_modules bu_modules = inf_gen_model.bu_modules im_modules = inf_gen_model.im_modules mix_module = inf_gen_model.mix_module # inf_gen_model.load_params(inf_gen_param_file) def clip_sigmoid(x): output = sigmoid(T.clip(x, -15.0, 15.0)) return output #################################### # Setup the optimization objective # #################################### lam_kld = sharedX(floatX([1.0])) gen_params = inf_gen_model.gen_params inf_params = inf_gen_model.inf_params g_params = gen_params + inf_params ###################################################### # BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS # ###################################################### # Setup symbolic vars for the model inputs, outputs, and costs Xg = T.tensor4() # symbolic var for inputs to bottom-up inference network Z0 = T.matrix() # symbolic var for "noise" inputs to the generative stuff ########################################################## # CONSTRUCT COST VARIABLES FOR THE VAE PART OF OBJECTIVE # ##########################################################
def load_vgg_feature_extractor(): vgg_param_dict = h5py.File(vgg_filepath, "r") # input_channel x output_channel x filter_size x filter_size # conv stage 0 (64x64=>32x32) # (3 x 64 x 3 x 3) conv_w0_0 = sharedX(vgg_param_dict["layer_1"]["param_0"], name="feat_conv_w0_0") conv_b0_0 = sharedX(vgg_param_dict["layer_1"]["param_1"], name="feat_conv_b0_0") # (64 x 64 x 3 x 3) conv_w0_1 = sharedX(vgg_param_dict["layer_3"]["param_0"], name="feat_conv_w0_1") conv_b0_1 = sharedX(vgg_param_dict["layer_3"]["param_1"], name="feat_conv_b0_1") # conv stage 1 (32x32=>16x16) # (64 x 128 x 3 x 3) conv_w1_0 = sharedX(vgg_param_dict["layer_6"]["param_0"], name="feat_conv_w1_0") conv_b1_0 = sharedX(vgg_param_dict["layer_6"]["param_1"], name="feat_conv_w1_0") # (128 x 128 x 3 x 3) conv_w1_1 = sharedX(vgg_param_dict["layer_8"]["param_0"], name="feat_conv_w1_1") conv_b1_1 = sharedX(vgg_param_dict["layer_8"]["param_1"], name="feat_conv_b1_1") # conv stage 2 (16x16=>8x8) # (128 x 256 x 3 x 3) conv_w2_0 = sharedX(vgg_param_dict["layer_11"]["param_0"], name="feat_conv_w2_0") conv_b2_0 = sharedX(vgg_param_dict["layer_11"]["param_1"], name="feat_conv_b2_0") # (256 x 256 x 3 x 3) conv_w2_1 = sharedX(vgg_param_dict["layer_13"]["param_0"], name="feat_conv_w2_1") conv_b2_1 = sharedX(vgg_param_dict["layer_13"]["param_1"], name="feat_conv_b2_1") # (256 x 256 x 3 x 3) conv_w2_2 = sharedX(vgg_param_dict["layer_15"]["param_0"], name="feat_conv_w2_2") conv_b2_2 = sharedX(vgg_param_dict["layer_15"]["param_1"], name="feat_conv_b2_2") # conv stage 3 (8x8=>4x4) # (256 x 512 x 3 x 3) conv_w3_0 = sharedX(vgg_param_dict["layer_18"]["param_0"], name="feat_conv_w3_0") conv_b3_0 = sharedX(vgg_param_dict["layer_18"]["param_1"], name="feat_conv_b3_0") # (512 x 512 x 3 x 3) conv_w3_1 = sharedX(vgg_param_dict["layer_20"]["param_0"], name="feat_conv_w3_1") conv_b3_1 = sharedX(vgg_param_dict["layer_20"]["param_1"], name="feat_conv_b3_1") # (512 x 512 x 3 x 3) conv_w3_2 = sharedX(vgg_param_dict["layer_22"]["param_0"], name="feat_conv_w3_2") conv_b3_2 = sharedX(vgg_param_dict["layer_22"]["param_1"], name="feat_conv_b3_2") # conv stage 4 (4x4=>2x2) # (512 x 512 x 3 x 3) conv_w4_0 = sharedX(vgg_param_dict["layer_25"]["param_0"], name="feat_conv_w4_0") conv_b4_0 = sharedX(vgg_param_dict["layer_25"]["param_1"], name="feat_conv_b4_0") # (512 x 512 x 3 x 3) conv_w4_1 = sharedX(vgg_param_dict["layer_27"]["param_0"], name="feat_conv_w4_1") conv_b4_1 = sharedX(vgg_param_dict["layer_27"]["param_1"], name="feat_conv_b4_1") # (512 x 512 x 3 x 3) conv_w4_2 = sharedX(vgg_param_dict["layer_29"]["param_0"], name="feat_conv_w4_2") conv_b4_2 = sharedX(vgg_param_dict["layer_29"]["param_1"], name="feat_conv_b4_2") parameter_set = [ conv_w0_0, conv_b0_0, conv_w0_1, conv_b0_1, conv_w1_0, conv_b1_0, conv_w1_1, conv_b1_1, conv_w2_0, conv_b2_0, conv_w2_1, conv_b2_1, conv_w2_2, conv_b2_2, conv_w3_0, conv_b3_0, conv_w3_1, conv_b3_1, conv_w3_2, conv_b3_2, conv_w4_0, conv_b4_0, conv_w4_1, conv_b4_1, conv_w4_2, conv_b4_2, ] def feature_extractor(input_data): # conv stage 0 (64x64=>32x32) h0_0 = dnn_conv(input_data, conv_w0_0, border_mode=(1, 1)) + conv_b0_0.dimshuffle("x", 0, "x", "x") h0_1 = dnn_conv(relu(h0_0), conv_w0_1, border_mode=(1, 1)) + conv_b0_1.dimshuffle("x", 0, "x", "x") h0 = dnn_pool(relu(h0_1), ws=(2, 2), stride=(2, 2)) # conv stage 1 (32x32=>16x16) h1_0 = dnn_conv(h0, conv_w1_0, border_mode=(1, 1)) + conv_b1_0.dimshuffle("x", 0, "x", "x") h1_1 = dnn_conv(relu(h1_0), conv_w1_1, border_mode=(1, 1)) + conv_b1_1.dimshuffle("x", 0, "x", "x") h1 = dnn_pool(relu(h1_1), ws=(2, 2), stride=(2, 2)) # conv stage 2 (16x16=>8x8) h2_0 = dnn_conv(h1, conv_w2_0, border_mode=(1, 1)) + conv_b2_0.dimshuffle("x", 0, "x", "x") h2_1 = dnn_conv(relu(h2_0), conv_w2_1, border_mode=(1, 1)) + conv_b2_1.dimshuffle("x", 0, "x", "x") h2_2 = dnn_conv(relu(h2_1), conv_w2_2, border_mode=(1, 1)) + conv_b2_2.dimshuffle("x", 0, "x", "x") h2 = dnn_pool(relu(h2_2), ws=(2, 2), stride=(2, 2)) # conv stage 3 (8x8=>4x4) h3_0 = dnn_conv(h2, conv_w3_0, border_mode=(1, 1)) + conv_b3_0.dimshuffle("x", 0, "x", "x") h3_1 = dnn_conv(relu(h3_0), conv_w3_1, border_mode=(1, 1)) + conv_b3_1.dimshuffle("x", 0, "x", "x") h3_2 = dnn_conv(relu(h3_1), conv_w3_2, border_mode=(1, 1)) + conv_b3_2.dimshuffle("x", 0, "x", "x") h3 = dnn_pool(relu(h3_2), ws=(2, 2), stride=(2, 2)) # conv stage 4 (4x4=>2x2) h4_0 = dnn_conv(h3, conv_w4_0, border_mode=(1, 1)) + conv_b4_0.dimshuffle("x", 0, "x", "x") h4_1 = dnn_conv(relu(h4_0), conv_w4_1, border_mode=(1, 1)) + conv_b4_1.dimshuffle("x", 0, "x", "x") h4_2 = dnn_conv(relu(h4_1), conv_w4_2, border_mode=(1, 1)) + conv_b4_2.dimshuffle("x", 0, "x", "x") h4 = dnn_pool(relu(h4_2), ws=(2, 2), stride=(2, 2)) return T.flatten(h4, 2) return feature_extractor, parameter_set
the file samples.png """ nz = 256 nc = 3 npx = 32 ngf = 128 ndf = 128 relu = activations.Rectify() sigmoid = activations.Sigmoid() lrelu = activations.LeakyRectify() tanh = activations.Tanh() #%% model_path = 'C:/Users/zhanq/OneDrive - Washington University in St. Louis/GitHub/dcgan_code/models/imagenet_gan_pretrain_128f_relu_lrelu_7l_3x3_256z/' gen_params = [sharedX(p) for p in joblib.load(model_path + '30_gen_params.jl')] discrim_params = [ sharedX(p) for p in joblib.load(model_path + '30_discrim_params.jl') ] #%% def gen(Z, w, g, b, w2, g2, b2, w3, g3, b3, w4, g4, b4, w5, g5, b5, w6, g6, b6, wx): h = relu(batchnorm(T.dot(Z, w), g=g, b=b)) h = h.reshape((h.shape[0], ngf * 4, 4, 4)) h2 = relu( batchnorm(deconv(h, w2, subsample=(2, 2), border_mode=(1, 1)), g=g2, b=b2))
# construct the "wrapper" object for managing all our modules inf_gen_model = CondInfGenModel( td_modules=td_modules, bu_modules_gen=bu_modules_gen, im_modules_gen=im_modules_gen, bu_modules_inf=bu_modules_inf, im_modules_inf=im_modules_inf, merge_info=merge_info, output_transform=output_noop) # inf_gen_model.load_params(inf_gen_param_file) #################################### # Setup the optimization objective # #################################### lam_kld = sharedX(floatX([1.0])) X_init = sharedX(floatX(np.zeros((1, nc, npx, npx)))) # default "initial state" noise = sharedX(floatX([noise_std])) gen_params = inf_gen_model.gen_params inf_params = inf_gen_model.inf_params all_params = inf_gen_model.all_params + [X_init] ###################################################### # BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS # ###################################################### # Setup symbolic vars for the model inputs, outputs, and costs Xg_gen = T.tensor4() # symbolic var for inputs to inference network Xm_gen = T.tensor4() Xg_inf = T.tensor4() # symbolic var for inputs to generator network Xm_inf = T.tensor4()
for lr in lr_list: for num_filters in num_filters_list: for hidden_size in hidden_size_list: for dropout in dropout_list: for lambda_eng in lambda_eng_list: for lambda_gen in lambda_gen_list: for init_noise in init_noise_list: for noise_decay in noise_decay_list: model_config_dict['hidden_size'] = hidden_size model_config_dict['min_num_gen_filters'] = num_filters model_config_dict['min_num_eng_filters'] = num_filters model_config_dict['init_noise'] = init_noise model_config_dict['noise_decay'] = noise_decay # set updates energy_optimizer = RMSprop(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng)) generator_optimizer = RMSprop(lr=sharedX(lr*10), regularizer=Regularizer(l2=lambda_gen)) model_test_name = model_name \ + '_f{}'.format(int(num_filters)) \ + '_h{}'.format(int(hidden_size)) \ + '_d{}'.format(int(dropout)) \ + '_re{}'.format(int(-np.log10(lambda_eng))) \ + '_rg{}'.format(int(-np.log10(lambda_gen))) \ + '_n{}'.format(int(-np.log10(init_noise))) \ + '_d{}'.format(int(1 if noise_decay is 1.0 else 0)) \ + '_lr{}'.format(int(-np.log10(lr))) \ train_model(data_stream=data_stream, energy_optimizer=energy_optimizer,
from lib.rng import py_rng, np_rng from lib.vis import color_grid_vis from lib.img_utils import inverse_transform, transform from sklearn.externals import joblib import theano import theano.tensor as T dcgan_root = "/mnt/disk1/vittal/dcgan_code/visual_concepts/" desc = "vcgan_orig_multi" model_dir = dcgan_root + '/models/%s/'%desc model_number = "35_gen_params.jl" gen_params_np = joblib.load(model_dir + model_number) gen_params = [sharedX(element) for element in gen_params_np] vc_nums = [41, 35, 37, 10, 3, 57, 60] costs = np.zeros((len(vc_nums), 1)) for ii, vc_num in enumerate(vc_nums): Z = T.matrix() gX = models.gen(Z, *gen_params) X = T.tensor4() cost = T.mean(T.sqr(gX - X)) if 'vc_num' in locals(): from load import visual_concepts from lib.config import data_dir import os path = os.path.join(data_dir, "vc.hdf5") tr_data, tr_stream = visual_concepts(path, ntrain=None) tr_handle = tr_data.open()
lambda_gen_list = [1e-10] for lr in lr_list: for num_filters in num_filters_list: for hidden_size in hidden_size_list: for expert_size in expert_size_list: for dropout in dropout_list: for lambda_eng in lambda_eng_list: for lambda_gen in lambda_gen_list: model_config_dict['hidden_size'] = hidden_size model_config_dict['expert_size'] = expert_size model_config_dict['min_num_gen_filters'] = num_filters model_config_dict['min_num_eng_filters'] = num_filters # set updates energy_optimizer = Adagrad(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng)) generator_optimizer = Adagrad(lr=sharedX(lr*2), regularizer=Regularizer(l2=lambda_gen)) generator_bn_optimizer = Adagrad(lr=sharedX(lr*2), regularizer=Regularizer(l2=0.0)) model_test_name = model_name \ + '_f{}'.format(int(num_filters)) \ + '_h{}'.format(int(hidden_size)) \ + '_e{}'.format(int(expert_size)) \ + '_d{}'.format(int(dropout)) \ + '_re{}'.format(int(-np.log10(lambda_eng))) \ + '_rg{}'.format(int(-np.log10(lambda_gen))) \ + '_lr{}'.format(int(-np.log10(lr))) \ train_model(data_stream=data_stream,
d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() d_error_gen = T.mean(p_gen) g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() d_cost = d_cost_real + d_cost_gen if args.onlyclassify: d_cost = d_classify elif args.classify: d_cost += d_classify g_cost = g_cost_d cost_target = [ g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen, d_error_real, d_error_gen, d_classify, d_classify_error ] lrg = sharedX(lr) lrd = sharedX(lr) l2t = sharedX(l2d) d_updater = updates.Adam(lr=lrd, b1=b1, regularizer=updates.Regularizer(l2=l2t)) g_updater = updates.Adam(lr=lrg, b1=b1, regularizer=updates.Regularizer(l2=l2)) """ #old model if args.onlyclassify: d_updates = d_updater(discrim_params[:-2]+discrim_params[-1:], d_cost) elif args.classify: d_updates = d_updater(discrim_params, d_cost) else: d_updates = d_updater(discrim_params[:-1], d_cost) """
lr_list = [1e-4] dropout_list = [False] lambda_eng_list = [1e-10] lambda_gen_list = [1e-10] for lr in lr_list: for num_filters in num_filters_list: for hidden_size in hidden_size_list: for lambda_eng in lambda_eng_list: for lambda_gen in lambda_gen_list: model_config_dict["hidden_size"] = hidden_size model_config_dict["min_num_gen_filters"] = num_filters model_config_dict["min_num_eng_filters"] = num_filters # set updates model_optimizer = RMSprop(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng)) model_test_name = ( model_name + "_f{}".format(int(num_filters)) + "_h{}".format(int(hidden_size)) + "_re{}".format(int(-np.log10(lambda_eng))) + "_rg{}".format(int(-np.log10(lambda_gen))) + "_lr{}".format(int(-np.log10(lr))) ) train_model( data_stream=data_stream, model_optimizer=model_optimizer, model_config_dict=model_config_dict, model_test_name=model_test_name, )
e_real_n = discrim(X+N, *discrim_params).sum(axis=1, keepdims=True) e_gen = discrim(gX, *discrim_params).sum(axis=1, keepdims=True) e_gen_n = discrim(gX+N, *discrim_params).sum(axis=1, keepdims=True) ###################################### # SET DISCRIMINATOR & GENERATOR COST # ###################################### e_cost = e_real_n.mean()-e_gen_n.mean() g_cost = e_gen_n.mean() cost = [e_cost, g_cost, e_real, e_gen, annealing] ############### # SET UPDATER # ############### d_updater = updates.RMSprop(lr=sharedX(0.0001), rho=0.5, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.RMSprop(lr=sharedX(0.0001), rho=0.5, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, e_cost) g_updates = g_updater(gen_params, annealing*g_cost) updates = d_updates + g_updates ###################################### # RANDOM SELECT INPUT DATA & DISPLAY # ###################################### vis_idxs = py_rng.sample(np.arange(len(vaX)), nvis) vaX_vis = inverse_transform(vaX[vis_idxs]) color_grid_vis(vaX_vis.transpose([0,2,3,1]), (14, 14), 'samples/%s_etl_test.png'%desc) #################### # COMPILE FUNCTION #
deltaX = T.tensor4() # random noise Z = T.matrix() f_real = discrim(X) # data f_gen = discrim(X0) # vgd particles cost_data = -1 * f_real.mean() cost_vgd = -1 * f_gen.mean() gX = gen(Z, *gen_params) g_cost = -1 * T.sum(T.sum(T.flatten(gX, 2) * T.flatten( deltaX, 2), axis=1)) #update generate models by minimize reconstruct mse balance_weight = sharedX(1.) d_cost = cost_data - balance_weight * cost_vgd # for discriminative model, minimize cost lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(rbm_params, d_cost) g_updates = g_updater(gen_params, g_cost) print 'COMPILING' t = time() _train_d = theano.function([X, X0], d_cost, updates=d_updates) _train_g = theano.function([Z, deltaX], g_cost, updates=g_updates) _gen = theano.function([Z], gen(Z, *gen_params)) _logp_rbm = theano.function([X], logp_rbm(X))
# construct the "wrapper" object for managing all our modules seq_cond_gen_model = \ DeepSeqCondGenRNN( td_modules=td_modules, bu_modules_gen=bu_modules_gen, im_modules_gen=im_modules_gen, bu_modules_inf=bu_modules_inf, im_modules_inf=im_modules_inf, merge_info=merge_info) # inf_gen_model.load_params(inf_gen_param_file) #################################### # Setup the optimization objective # #################################### lam_kld = sharedX(floatX([1.0])) c0 = sharedX(floatX(np.zeros((1, nc, npx, npx)))) gen_params = seq_cond_gen_model.gen_params + [c0] inf_params = seq_cond_gen_model.inf_params all_params = seq_cond_gen_model.all_params + [c0] ###################################################### # BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS # ###################################################### def clip_sigmoid(x): output = sigmoid(T.clip(x, -15.0, 15.0)) return output
rX = dv1 mse = T.sqrt(T.sum(T.flatten((X - rX)**2, 2), axis=1)) return (T.flatten(cv6, 2), rX, mse) X = T.tensor4() # data X0 = T.tensor4() # vgd samples X1 = T.tensor4() # vgd samples deltaX = T.tensor4() #vgd gradient Z = T.matrix() ### define discriminative cost ### _, rX_data, mse_data = discrim(X) _, rX_vgd, mse_vgd = discrim(X0) balance_weight = sharedX(0.3) d_cost = T.mean(mse_data - balance_weight * mse_vgd) ################################# VGD ################################ def vgd_kernel(X0): XY = T.dot(X0, X0.transpose()) x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1)) X2e = T.repeat(x2, X0.shape[0], axis=1) H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY) V = H.flatten() # median distance h = T.switch( T.eq((V.shape[0] % 2), 0),
from lib.img_utils import transform from sklearn.externals import joblib import theano import theano.tensor as T from tqdm import tqdm from load import visual_concepts dcgan_root = "/mnt/disk1/vittal/dcgan_code/visual_concepts/" desc = "vcgan_orig_multi" model_dir = dcgan_root + '/models/%s/'%desc model_number = "25_discrim_params.jl" discrim_params_np = joblib.load(model_dir + model_number) discrim_params = [sharedX(element) for element in discrim_params_np] X = T.tensor4() Y = T.matrix() YMULTI = T.matrix() YHAT = T.matrix() YHAT_MULTI = T.matrix() dX = models.discrim(X, *discrim_params) print 'COMPILING...' _dis = theano.function([X], dX) print 'Done!' # Data processing path = os.path.join(data_dir, "vc.hdf5") tr_data, tr_stream = visual_concepts(path, ntrain=None) patches_idx = tr_stream.dataset.provides_sources.index('patches')
def load_generator_model(min_num_gen_filters, model_params_dict): # initial square image size init_image_size = 4 # set num of filters for each layer num_gen_filters0 = min_num_gen_filters*8 # LAYER 0 (LINEAR W/ BN) print 'LOAD GENERATOR LINEAR LAYER 0' linear_w0 = sharedX(model_params_dict[ 'gen_linear_w0'], name='gen_linear_w0') linear_bn_w0 = sharedX(model_params_dict['gen_linear_bn_w0'], name='gen_linear_bn_w0') linear_bn_b0 = sharedX(model_params_dict['gen_linear_bn_b0'], name='gen_linear_bn_b0') # LAYER 1 (DECONV) print 'SET GENERATOR CONV LAYER 1' conv_w1 = sharedX(model_params_dict[ 'gen_conv_w1'], name='gen_conv_w1') conv_bn_w1 = sharedX(model_params_dict['gen_conv_bn_w1'], name='gen_conv_bn_w1') conv_bn_b1 = sharedX(model_params_dict['gen_conv_bn_b1'], name='gen_conv_bn_b1') # LAYER 2 (DECONV) print 'SET GENERATOR CONV LAYER 2' conv_w2 = sharedX(model_params_dict[ 'gen_conv_w2'], name='gen_conv_w2') conv_bn_w2 = sharedX(model_params_dict['gen_conv_bn_w2'], name='gen_conv_bn_w2') conv_bn_b2 = sharedX(model_params_dict['gen_conv_bn_b2'], name='gen_conv_bn_b2') # LAYER 2 (DECONV) print 'SET GENERATOR CONV LAYER 3' conv_w3 = sharedX(model_params_dict[ 'gen_conv_w3'], name='gen_conv_w3') conv_bn_w3 = sharedX(model_params_dict['gen_conv_bn_w3'], name='gen_conv_bn_w3') conv_bn_b3 = sharedX(model_params_dict['gen_conv_bn_b3'], name='gen_conv_bn_b3') # LAYER 3 (DECONV) print 'SET GENERATOR CONV LAYER 4' conv_w4 = sharedX(model_params_dict[ 'gen_conv_w4'], name='gen_conv_w4') conv_b4 = sharedX(model_params_dict[ 'gen_conv_b4'], name='gen_conv_b4') generator_params = [[linear_w0, linear_bn_b0, conv_w1, conv_bn_b1, conv_w2, conv_bn_b2, conv_w3, conv_bn_b3, conv_w4, conv_b4], [linear_bn_w0, conv_bn_w1, conv_bn_w2, conv_bn_w3]] print 'SET GENERATOR FUNCTION' def generator_function(hidden_data, is_train=True): # layer 0 (linear) h0 = T.dot(hidden_data, linear_w0) h0 = h0 + t_rng.normal(size=h0.shape, std=0.01, dtype=t_floatX) h0 = relu(batchnorm(X=h0, g=linear_bn_w0, b=linear_bn_b0)) h0 = h0.reshape((h0.shape[0], num_gen_filters0, init_image_size, init_image_size)) # layer 1 (deconv) h1 = deconv(h0, conv_w1, subsample=(2, 2), border_mode=(2, 2)) h1 = h1 + t_rng.normal(size=h1.shape, std=0.01, dtype=t_floatX) h1 = relu(batchnorm(h1, g=conv_bn_w1, b=conv_bn_b1)) # layer 2 (deconv) h2 = deconv(h1, conv_w2, subsample=(2, 2), border_mode=(2, 2)) h2 = h2 + t_rng.normal(size=h2.shape, std=0.01, dtype=t_floatX) h2 = relu(batchnorm(h2, g=conv_bn_w2, b=conv_bn_b2)) # layer 3 (deconv) h3 = deconv(h2, conv_w3, subsample=(2, 2), border_mode=(2, 2)) h3 = h3 + t_rng.normal(size=h3.shape, std=0.01, dtype=t_floatX) h3 = relu(batchnorm(h3, g=conv_bn_w3, b=conv_bn_b3)) # layer 4 (deconv) output = tanh(deconv(h3, conv_w4, subsample=(2, 2), border_mode=(2, 2))+conv_b4.dimshuffle('x', 0, 'x', 'x')) return output return [generator_function, generator_params]
inf_gen_model = InfGenModel( bu_modules=bu_modules, td_modules=td_modules, im_modules=im_modules, sc_modules=[], merge_info=merge_info, output_transform=output_transform, use_sc=False ) #inf_gen_model.load_params(inf_gen_param_file) #################################### # Setup the optimization objective # #################################### lam_vae = sharedX(floatX([1.0])) lam_kld = sharedX(floatX([1.0])) noise = sharedX(floatX([noise_std])) gen_params = inf_gen_model.gen_params inf_params = inf_gen_model.inf_params g_params = gen_params + inf_params ###################################################### # BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS # ###################################################### # Setup symbolic vars for the model inputs, outputs, and costs Xg = T.tensor4() # symbolic var for inputs to bottom-up inference network Z0 = T.matrix() # symbolic var for "noise" inputs to the generative stuff ##########################################################
def load_batchnorm(model_path): bn = utils.PickleLoad(model_path) bn_params = [sharedX(b) for b in bn] return bn_params
np.save(file=samples_dir + "/" + model_name + "_MOMENT_COST", arr=np.asarray(moment_cost_list)) np.save(file=samples_dir + "/" + model_name + "_VAE_COST", arr=np.asarray(vae_cost_list)) if __name__ == "__main__": batch_size = 128 num_epochs = 100 _, data_stream = faces(batch_size=batch_size) num_hiddens = 1024 learning_rate = 1e-4 l2_weight = 1e-5 optimizer = Adagrad(lr=sharedX(learning_rate), regularizer=Regularizer(l2=l2_weight)) model_test_name = ( model_name + "_HIDDEN{}".format(int(num_hiddens)) + "_REG{}".format(int(-np.log10(l2_weight))) + "_LR{}".format(int(-np.log10(learning_rate))) ) train_model( model_name=model_test_name, data_stream=data_stream, num_hiddens=num_hiddens, num_epochs=num_epochs, optimizer=optimizer, )
# compute costs based on discriminator output for real/generated data d_cost_real = sum([bce(p, T.ones(p.shape)).mean() for p in p_real]) d_cost_gen = sum([bce(p, T.zeros(p.shape)).mean() for p in p_gen]) g_cost_d = sum([bce(p, T.ones(p.shape)).mean() for p in p_gen]) # d_cost_real = bce(p_real[-1], T.ones(p_real[-1].shape)).mean() # d_cost_gen = bce(p_gen[-1], T.zeros(p_gen[-1].shape)).mean() # g_cost_d = bce(p_gen[-1], T.ones(p_gen[-1].shape)).mean() d_cost = d_cost_real + d_cost_gen + (1e-5 * sum([T.sum(p**2.0) for p in discrim_params])) g_cost = g_cost_d + (1e-5 * sum([T.sum(p**2.0) for p in gen_params])) cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] lrt = sharedX(lr) d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2)) d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) updates = d_updates + g_updates print 'COMPILING' t = time() _train_g = theano.function([X, Z0], cost, updates=g_updates) _train_d = theano.function([X, Z0], cost, updates=d_updates) _gen = theano.function([Z0], gX) print "{0:.2f} seconds to compile theano functions".format(time()-t) f_log = open("{}/{}.ndjson".format(log_dir, desc), 'wb')
get_masked_data(x_in, im_shape=(nc, npx, npx), drop_prob=0., occ_shape=(16, 16), occ_count=3, data_mean=Xmu) # reshape and process data for use as model input xm_gen = 1. - xm_gen # mask is 1 for unobserved pixels xm_inf = xm_gen # mask is 1 for pixels to predict xg_gen = train_transform(xg_gen) xm_gen = train_transform(xm_gen, add_fuzz=False) xg_inf = train_transform(xg_inf) xm_inf = train_transform(xm_inf, add_fuzz=False) return xg_gen, xm_gen, xg_inf, xm_inf #################################### # Setup the optimization objective # #################################### lam_kld = sharedX(floatX([1.0])) log_var = sharedX(floatX([1.0])) X_init = sharedX(floatX(np.zeros((1, nc, npx, npx)))) gen_params = inf_gen_model.gen_params inf_params = inf_gen_model.inf_params all_params = inf_gen_model.all_params + [log_var, X_init] ###################################################### # BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS # ###################################################### # Setup symbolic vars for the model inputs, outputs, and costs Xg_gen = T.tensor4() # input to generator, with some parts masked out Xm_gen = T.tensor4() # mask indicating parts that are masked out Xg_inf = T.tensor4() # complete observation, for input to inference net Xm_inf = T.tensor4() # mask for which bits to predict