def construct_nets(self): self.num_disc_layers = 5 self.num_gen_layers = 5 self.d_batch_norm = AttributeDict([ ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers) ]) self.sup_d_batch_norm = AttributeDict([ ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers) ]) self.g_batch_norm = AttributeDict([ ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(self.num_gen_layers) ]) s_h, s_w = self.x_dim[0], self.x_dim[1] s_h2, s_w2 = conv_out_size(s_h, 2), conv_out_size(s_w, 2) s_h4, s_w4 = conv_out_size(s_h2, 2), conv_out_size(s_w2, 2) s_h8, s_w8 = conv_out_size(s_h4, 2), conv_out_size(s_w4, 2) s_h16, s_w16 = conv_out_size(s_h8, 2), conv_out_size(s_w8, 2) self.gen_output_dims = OrderedDict([("g_h0_out", (s_h16, s_w16)), ("g_h1_out", (s_h8, s_w8)), ("g_h2_out", (s_h4, s_w4)), ("g_h3_out", (s_h2, s_w2)), ("g_h4_out", (s_h, s_w))]) self.gen_weight_dims = OrderedDict([ ("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)), ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16, )), ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)), ("g_h1_b", (self.gf_dim * 4, )), ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)), ("g_h2_b", (self.gf_dim * 2, )), ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)), ("g_h3_b", (self.gf_dim * 1, )), ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)), ("g_h4_b", (self.c_dim, )) ]) self.disc_weight_dims = OrderedDict([ ("d_h0_W", (5, 5, self.c_dim, self.df_dim)), ("d_h0_b", (self.df_dim, )), ("d_h1_W", (5, 5, self.df_dim, self.df_dim * 2)), ("d_h1_b", (self.df_dim * 2, )), ("d_h2_W", (5, 5, self.df_dim * 2, self.df_dim * 4)), ("d_h2_b", (self.df_dim * 4, )), ("d_h3_W", (5, 5, self.df_dim * 4, self.df_dim * 8)), ("d_h3_b", (self.df_dim * 8, )), ("d_h_end_lin_W", (self.df_dim * 8 * s_h16 * s_w16, self.df_dim * 4)), ("d_h_end_lin_b", (self.df_dim * 4, )), ("d_h_out_lin_W", (self.df_dim * 4, self.K)), ("d_h_out_lin_b", (self.K, )) ])
def build_bgan_graph(self): self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images') self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels') self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K+1], name='real_targets') self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z') #self.z_sum = histogram_summary("z", self.z) TODO looks cool ### Generator self.gen_param_list = [] with tf.variable_scope("generator") as scope: gen_params = AttributeDict() for name, shape in self.weight_dims.items(): gen_params[name] = tf.get_variable("%s" % (name), shape, initializer=tf.random_normal_initializer(stddev=0.02)) self.gen_param_list.append(gen_params) self.generation = {} for gen_params in self.gen_param_list: self.generation["g_prior"]=self.gen_prior(gen_params) self.generation["generators"]=self.generator(self.z, gen_params) self.generation["gen_samplers"]=self.sampler(self.z, gen_params)
def initialize_wgts(self, scope_str): if scope_str == "generator": weight_dims = self.gen_weight_dims numz = self.num_gen elif scope_str == "discriminator": weight_dims = self.disc_weight_dims numz = self.num_disc else: raise RuntimeError("invalid scope!") param_list = [] with tf.variable_scope(scope_str) as scope: for zi in range(numz): for m in range(self.num_mcmc): wgts_ = AttributeDict() for name, shape in weight_dims.items(): wgts_[name] = tf.get_variable( "%s_%04d_%04d" % (name, zi, m), shape, initializer=tf.random_normal_initializer( stddev=0.02)) param_list.append(wgts_) return param_list
def initialize_dist_wgts(self, scope_str): if scope_str == "distrib_classifier": weight_dims = self.distrib_weight_dims else: raise RuntimeError("invalid scope!") param_list = [] with tf.variable_scope(scope_str) as scope: wgts_ = AttributeDict() for zi in range(self.num_gen): mu_name = "dist_%i_mu" % (zi) mu_shape = weight_dims[mu_name] var_name = "dist_%i_var" % (zi) var_shape = weight_dims[var_name] wgts_[mu_name] = tf.get_variable( "%s" % mu_name, mu_shape, initializer=tf.random_normal_initializer(mean=1 / self.num_gen, stddev=0.02)) wgts_[var_name] = tf.get_variable( "%s" % var_name, var_shape, initializer=tf.random_normal_initializer(stddev=0.02)) # for name, shape in weight_dims.items(): # wgts_[name] = tf.get_variable("%s" % name, shape, initializer=tf.random_normal_initializer(mean=1/self.num_gen, stddev=0.02)) param_list.append(wgts_) return param_list
def initialize_wgts(self, scope_str): if scope_str == GEN: weight_dims = self.gen_weight_dims numz = self.num_gen elif scope_str == DISC: weight_dims = self.disc_weight_dims numz = self.num_disc elif scope_str == ENC: weight_dims = self.enc_weight_dims numz = self.num_enc else: raise RuntimeError("invalid scope!") param_list = [] with tf.variable_scope(scope_str) as scope: for zi in range(numz): for m in range(self.num_mcmc): wgts_ = AttributeDict() for name, shape in weight_dims.items(): wgts_[name] = tf.get_variable( "%s_%04d_%04d" % (name, zi, m), shape, initializer=tf.glorot_uniform_initializer()) param_list.append(wgts_) return param_list
def construct_gen_from_hypers(self, gen_kernel_size=5, gen_strides=[2, 2, 2, 2], num_gfs=None): self.g_batch_norm = AttributeDict( [("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) \ for gbn_i in range(len(gen_strides))]) if num_gfs is None: num_gfs = [ self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim ] assert len(gen_strides) == len(num_gfs), "invalid hypers!" s_h, s_w = self.x_dim[0], self.x_dim[1] ks = gen_kernel_size self.gen_output_dims = OrderedDict() self.gen_weight_dims = OrderedDict() num_gfs = num_gfs + [self.c_dim] self.gen_kernel_sizes = [ks] for layer in range(len(gen_strides))[::-1]: self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w) assert gen_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.gen_weight_dims["g_h%i_W" % (layer + 1)] = \ (ks, ks, num_gfs[layer + 1], num_gfs[layer]) self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer + 1], ) s_h = conv_out_size(s_h, gen_strides[layer]) s_w = conv_out_size(s_w, gen_strides[layer]) ks = kernel_sizer(ks, gen_strides[layer]) self.gen_kernel_sizes.append(ks) self.gen_weight_dims.update( OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)), ("g_h0_lin_b", (num_gfs[0] * s_h * s_w, ))])) self.gen_output_dims["g_h0_out"] = (s_h, s_w) for k, v in self.gen_output_dims.items(): print("%s: %s" % (k, v)) print('****') for k, v in self.gen_weight_dims.items(): print("%s: %s" % (k, v)) print('****')
def construct_disc_from_hypers(self, disc_kernel_size=5, disc_strides=[2, 2, 2, 2], num_dfs=None): self.d_batch_norm = AttributeDict( [("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) \ for dbn_i in range(len(disc_strides))]) if num_dfs is None: num_dfs = [ self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8 ] assert len(disc_strides) == len(num_dfs), "invalid hypers!" self.disc_weight_dims = OrderedDict() s_h, s_w = self.x_dim[0], self.x_dim[1] num_dfs = [self.c_dim] + num_dfs ks = disc_kernel_size self.disc_kernel_sizes = [ks] for layer in range(len(disc_strides)): assert disc_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.disc_weight_dims["d_h%i_W" % layer] = \ (ks, ks, num_dfs[layer], num_dfs[layer + 1]) self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1], ) s_h = conv_out_size(s_h, disc_strides[layer]) s_w = conv_out_size(s_w, disc_strides[layer]) ks = kernel_sizer(ks, disc_strides[layer]) self.disc_kernel_sizes.append(ks) self.disc_weight_dims.update( OrderedDict([("d_h0_enc_lin_W", (self.z_dim, num_dfs[-1])), ("d_h0_enc_lin_b", (num_dfs[-1])), ("d_h1_enc_lin_W", (num_dfs[-1], num_dfs[-1])), ("d_h1_enc_lin_b", (num_dfs[-1], )), ("d_h2_enc_lin_W", (num_dfs[-1], num_dfs[-1])), ("d_h2_enc_lin_b", (num_dfs[-1], )), ("d_h_lin_W", (num_dfs[-1] * s_h * s_w, num_dfs[-1])), ("d_h_lin_b", (num_dfs[-1], )), ("d_h_out_lin_W", (num_dfs[-1], self.K)), ("d_h_out_lin_b", (self.K, ))])) for k, v in self.disc_weight_dims.items(): print("%s: %s" % (k, v)) print("*****")
def construct_enc_from_hypers(self, enc_kernel_size=5, enc_strides=[2, 2, 2, 2], num_efs=None): self.e_batch_norm = AttributeDict( [("e_bn%i" % ebn_i, batch_norm(name="e_bn%i" % ebn_i)) \ for ebn_i in range(len(enc_strides))]) if num_efs is None: num_efs = [self.ef_dim * (2**i) for i in range(4)] assert len(enc_strides) == len(num_efs), "invalid hypers!" self.enc_weight_dims = OrderedDict() s_h, s_w = self.x_dim[0], self.x_dim[1] num_efs = [self.c_dim] + num_efs ks = enc_kernel_size self.enc_kernel_sizes = [ks] for layer in range(len(enc_strides)): assert enc_strides[layer] <= 2, "invalid strides" assert ks % 2 == 1, "invalid kernel size" self.enc_weight_dims["e_h%i_W" % layer] = \ (ks, ks, num_efs[layer], num_efs[layer + 1]) self.enc_weight_dims["e_h%i_b" % layer] = (num_efs[layer + 1], ) s_h = conv_out_size(s_h, enc_strides[layer]) s_w = conv_out_size(s_w, enc_strides[layer]) ks = kernel_sizer(ks, enc_strides[layer]) self.enc_kernel_sizes.append(ks) self.enc_weight_dims.update( OrderedDict([("e_h_end_lin_W", (num_efs[-1] * s_h * s_w, num_efs[-1])), ("e_h_end_lin_b", (num_efs[-1], )), ("e_h_out_lin_W", (num_efs[-1], self.z_dim)), ("e_h_out_lin_b", (self.z_dim, ))])) for k, v in self.enc_weight_dims.items(): print("%s: %s" % (k, v)) print('****')
def build_bgan_graph(self): self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images') self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels') self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1], name='real_targets') self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z') self.drop_prob = tf.placeholder(tf.float32, name='keep_prob') #self.z_sum = histogram_summary("z", self.z) TODO looks cool self.gen_param_list = [] #Creating a generator parameter list under generator scope with tf.variable_scope("generator") as scope: self.gen_params = AttributeDict( ) #A dictionary to dump the generator parameters for name, shape in self.weight_dims.items(): if ('_W' in name): self.gen_params[name] = tf.get_variable( "%s" % (name), shape, initializer=tf.random_normal_initializer( stddev=1. / tf.sqrt(shape[0] / 2.))) else: self.gen_params[name] = tf.get_variable( "%s" % (name), shape, initializer=tf.random_normal_initializer(stddev=0.02)) self.D, self.D_logits = self.discriminator(self.inputs, self.K) self.d_loss_real = -tf.reduce_mean(tf.log(self.D + 1e-8)) self.generation = defaultdict(list) self.generation["generators"].append( self.generator(self.z, self.gen_params, self.drop_prob)) self.D_, D_logits_ = self.discriminator(self.generator( self.z, self.gen_params, self.drop_prob), self.K, reuse=True) self.generation["d_logits"].append(D_logits_) self.generation["d_probs"].append(self.D_) self.d_loss_fake = -tf.reduce_mean( tf.log(1 - self.generation["d_probs"][0] + 1e-8)) #print(d_loss_fake) g_loss_ = -tf.reduce_mean(tf.log(self.generation["d_probs"][0] + 1e-8)) t_vars = tf.trainable_variables() reg_loss_g = 0 reg_loss_d = 0 with tf.variable_scope("generator") as scope: for name, value in self.gen_params.items(): if ('_W' in name): reg_loss_g += tf.nn.l2_loss(value) self.d_vars = [var for var in t_vars if 'd_' in var.name] clip_d = [ w.assign(tf.clip_by_value(w, -0.01, 0.01)) for w in self.d_vars ] self.clip_d = clip_d self.cnt = 0 for var in t_vars: if ('_W' in var.name and 'd_' in var.name): reg_loss_d += tf.nn.l2_loss(var) self.cnt += 1 self.d_loss = tf.reduce_mean(self.d_loss_real + self.d_loss_fake + self.alpha * reg_loss_d) self.g_vars = [] self.g_vars.append([var for var in t_vars if 'g_' in var.name]) self.g_loss = tf.reduce_mean(g_loss_ + self.alpha * reg_loss_g) self.d_learning_rate = tf.placeholder(tf.float32, shape=[]) d_opt_adam = tf.train.AdamOptimizer(learning_rate=self.d_learning_rate) self.d_optim_adam = d_opt_adam.minimize(self.d_loss, var_list=self.d_vars) self.g_learning_rate = tf.placeholder(tf.float32, shape=[]) g_opt_adam = tf.train.AdamOptimizer(learning_rate=self.g_learning_rate) self.g_optims_adam = g_opt_adam.minimize(self.g_loss, var_list=self.g_vars)
class BGAN(object): def __init__(self, x_dim, z_dim, dataset_size, batch_size=64, alpha=0.001, lr=0.0002, optimizer='adam'): self.batch_size = batch_size self.dataset_size = dataset_size self.x_dim = x_dim self.z_dim = z_dim self.optimizer = optimizer.lower() self.alpha = alpha self.lr = lr self.weight_dims = OrderedDict([("g_h0_lin_W", (self.z_dim, 100)), ("g_h0_lin_b", (100, )), ("g_h1_lin_W", (100, 100)), ("g_h1_lin_b", (100, )), ("g_lin_W", (100, self.x_dim[0])), ("g_lin_b", (self.x_dim[0]))]) self.K = 1 # 1 means unsupervised, label == 0 always reserved for fake self.build_bgan_graph() def _get_optimizer(self, lr): if self.optimizer == 'adam': return tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) elif self.optimizer == 'sgd': return tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.5) else: raise ValueError("Optimizer must be either 'adam' or 'sgd'") def build_bgan_graph(self): self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images') self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels') self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1], name='real_targets') self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z') self.drop_prob = tf.placeholder(tf.float32, name='keep_prob') #self.z_sum = histogram_summary("z", self.z) TODO looks cool self.gen_param_list = [] #Creating a generator parameter list under generator scope with tf.variable_scope("generator") as scope: self.gen_params = AttributeDict( ) #A dictionary to dump the generator parameters for name, shape in self.weight_dims.items(): if ('_W' in name): self.gen_params[name] = tf.get_variable( "%s" % (name), shape, initializer=tf.random_normal_initializer( stddev=1. / tf.sqrt(shape[0] / 2.))) else: self.gen_params[name] = tf.get_variable( "%s" % (name), shape, initializer=tf.random_normal_initializer(stddev=0.02)) self.D, self.D_logits = self.discriminator(self.inputs, self.K) self.d_loss_real = -tf.reduce_mean(tf.log(self.D + 1e-8)) self.generation = defaultdict(list) self.generation["generators"].append( self.generator(self.z, self.gen_params, self.drop_prob)) self.D_, D_logits_ = self.discriminator(self.generator( self.z, self.gen_params, self.drop_prob), self.K, reuse=True) self.generation["d_logits"].append(D_logits_) self.generation["d_probs"].append(self.D_) self.d_loss_fake = -tf.reduce_mean( tf.log(1 - self.generation["d_probs"][0] + 1e-8)) #print(d_loss_fake) g_loss_ = -tf.reduce_mean(tf.log(self.generation["d_probs"][0] + 1e-8)) t_vars = tf.trainable_variables() reg_loss_g = 0 reg_loss_d = 0 with tf.variable_scope("generator") as scope: for name, value in self.gen_params.items(): if ('_W' in name): reg_loss_g += tf.nn.l2_loss(value) self.d_vars = [var for var in t_vars if 'd_' in var.name] clip_d = [ w.assign(tf.clip_by_value(w, -0.01, 0.01)) for w in self.d_vars ] self.clip_d = clip_d self.cnt = 0 for var in t_vars: if ('_W' in var.name and 'd_' in var.name): reg_loss_d += tf.nn.l2_loss(var) self.cnt += 1 self.d_loss = tf.reduce_mean(self.d_loss_real + self.d_loss_fake + self.alpha * reg_loss_d) self.g_vars = [] self.g_vars.append([var for var in t_vars if 'g_' in var.name]) self.g_loss = tf.reduce_mean(g_loss_ + self.alpha * reg_loss_g) self.d_learning_rate = tf.placeholder(tf.float32, shape=[]) d_opt_adam = tf.train.AdamOptimizer(learning_rate=self.d_learning_rate) self.d_optim_adam = d_opt_adam.minimize(self.d_loss, var_list=self.d_vars) self.g_learning_rate = tf.placeholder(tf.float32, shape=[]) g_opt_adam = tf.train.AdamOptimizer(learning_rate=self.g_learning_rate) self.g_optims_adam = g_opt_adam.minimize(self.g_loss, var_list=self.g_vars) def discriminator(self, x, K, reuse=False): with tf.variable_scope("discriminator") as scope: if reuse: scope.reuse_variables() h0 = lrelu(linear(x, 100, 'd_lin_0')) drop = dropout(h0, dropout_rate=0.01, name='dropout_layer', training=False) h1 = linear(drop, K, 'd_lin_1') return tf.nn.sigmoid(h1), h1 '''def generator3(self, z, gen_params): with tf.variable_scope("generator") as scope: h0 = lrelu(linear(z, 2000, 'g_h0_lin', matrix=gen_params.g_h0_lin_W, bias=gen_params.g_h0_lin_b)) drop = dropout(h0, dropout_rate=0.95, name='dropout_layer', training=True) h2 = linear(drop, self.x_dim[0], 'g_lin', matrix=gen_params.g_lin_W, bias=gen_params.g_lin_b) self.x_ = tanh(h2) return self.x_''' def generator(self, z, gen_params, drop_prob): with tf.variable_scope("generator") as scope: self.h0 = lrelu( linear(z, 100, 'g_h0_lin', matrix=gen_params.g_h0_lin_W, bias=gen_params.g_h0_lin_b)) self.drop = dropout(self.h0, dropout_rate=drop_prob, name='dropout_layer', training=True) self.h1 = lrelu( linear(self.drop, 100, 'g_h1_lin', matrix=gen_params.g_h1_lin_W, bias=gen_params.g_h1_lin_b)) self.drop = dropout(self.h1, dropout_rate=drop_prob, name='dropout_layer', training=True) self.x = linear(self.drop, self.x_dim[0], 'g_lin', matrix=gen_params.g_lin_W, bias=gen_params.g_lin_b) #self.x_ = tanh(h2) return self.x
def construct_from_hypers(self, gen_kernel_size=3, num_dfs=None, num_gfs=None): self.num_disc_layers = 5 self.num_gen_layers = 19 - 4 self.d_batch_norm = AttributeDict([ ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers) ]) self.g_batch_norm = AttributeDict([ ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(self.num_gen_layers) ]) if num_dfs is None: num_dfs = [ self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8, self.df_dim ] if num_gfs is None: num_gfs = [ self.gf_dim, self.gf_dim, self.gf_dim * 2, self.gf_dim * 2, self.gf_dim * 4, self.gf_dim * 4, self.gf_dim * 8, self.gf_dim * 8, # middle layer # self.gf_dim * 16, self.gf_dim * 16, # self.gf_dim * 8, self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim * 2, self.gf_dim, self.gf_dim, self.num_classes, self.num_classes ] ## output logits s_h, s_w, s_d = self.x_dim[0], self.x_dim[1], self.x_dim[2] s_h2, s_w2, s_d2 = conv_out_size(s_h, 2), conv_out_size( s_w, 2), conv_out_size(s_d, 2) s_h4, s_w4, s_d4 = conv_out_size(s_h2, 2), conv_out_size( s_w2, 2), conv_out_size(s_d2, 2) s_h8, s_w8, s_d8 = conv_out_size(s_h4, 2), conv_out_size( s_w4, 2), conv_out_size(s_d4, 2) s_h16, s_w16, s_d16 = conv_out_size(s_h8, 2), conv_out_size( s_w8, 2), conv_out_size(s_d8, 2) ks = gen_kernel_size # self.gen_output_dims = OrderedDict() self.gen_weight_dims = OrderedDict() num_gfs = num_gfs + [self.channel] self.gen_kernel_sizes = [ks] ################ build unet_generator from the one-by-one #################### self.gen_weight_dims["g_h%i_W" % 0] = (3, 3, 3, self.channel, num_gfs[0]) # from the image self.gen_weight_dims["g_h%i_b" % 0] = (num_gfs[0], ) self.gen_weight_dims["g_h%i_W" % 1] = (3, 3, 3, num_gfs[1], num_gfs[1] ) # conv1 self.gen_weight_dims["g_h%i_b" % 1] = (num_gfs[1], ) self.gen_weight_dims["g_h%i_W" % 2] = (3, 3, 3, num_gfs[1], num_gfs[2]) self.gen_weight_dims["g_h%i_b" % 2] = (num_gfs[2], ) self.gen_weight_dims["g_h%i_W" % 3] = (3, 3, 3, num_gfs[3], num_gfs[3] ) # conv2 self.gen_weight_dims["g_h%i_b" % 3] = (num_gfs[3], ) self.gen_weight_dims["g_h%i_W" % 4] = (3, 3, 3, num_gfs[3], num_gfs[4]) self.gen_weight_dims["g_h%i_b" % 4] = (num_gfs[4], ) self.gen_weight_dims["g_h%i_W" % 5] = (3, 3, 3, num_gfs[5], num_gfs[5] ) # conv3 self.gen_weight_dims["g_h%i_b" % 5] = (num_gfs[5], ) ############################################################################################# self.gen_weight_dims["g_h%i_W" % 6] = (3, 3, 3, num_gfs[5], num_gfs[6]) self.gen_weight_dims["g_h%i_b" % 6] = (num_gfs[6], ) self.gen_weight_dims["g_h%i_W" % 7] = (3, 3, 3, num_gfs[7], num_gfs[7] ) # conv4 self.gen_weight_dims["g_h%i_b" % 7] = (num_gfs[7], ) ############################################################################################## self.gen_weight_dims["g_h%i_W" % 8] = ( 3, 3, 3, num_gfs[5] + num_gfs[7], num_gfs[8]) # conv6 concat conv4 self.gen_weight_dims["g_h%i_b" % 8] = (num_gfs[8], ) self.gen_weight_dims["g_h%i_W" % 9] = (3, 3, 3, num_gfs[9], num_gfs[9]) self.gen_weight_dims["g_h%i_b" % 9] = (num_gfs[9], ) self.gen_weight_dims["g_h%i_W" % 10] = (3, 3, 3, num_gfs[9] + num_gfs[3], num_gfs[10]) # conv7 concat conv3 self.gen_weight_dims["g_h%i_b" % 10] = (num_gfs[10], ) self.gen_weight_dims["g_h%i_W" % 11] = (3, 3, 3, num_gfs[11], num_gfs[11]) self.gen_weight_dims["g_h%i_b" % 11] = (num_gfs[11], ) self.gen_weight_dims["g_h%i_W" % 12] = (3, 3, 3, num_gfs[11] + num_gfs[1], num_gfs[12]) # conv8 concat conv2 self.gen_weight_dims["g_h%i_b" % 12] = (num_gfs[12], ) self.gen_weight_dims["g_h%i_W" % 13] = (3, 3, 3, num_gfs[13], num_gfs[13]) self.gen_weight_dims["g_h%i_b" % 13] = (num_gfs[13], ) ################### output layer ######################### self.gen_weight_dims["g_h%i_W" % 14] = (1, 1, 1, num_gfs[13], num_gfs[14]) self.gen_weight_dims["g_h%i_b" % 14] = (num_gfs[14], ) ######################################################################################################### self.disc_weight_dims = OrderedDict() self.disc_weight_dims["d_h%i_W" % 0] = ( 5, 5, 5, self.num_classes + self.channel, num_dfs[0] ) # output = ( s_h / 2, s_w / 2 ) self.disc_weight_dims["d_h%i_b" % 0] = (num_dfs[0], ) self.disc_weight_dims["d_h%i_W" % 1] = ( 5, 5, 5, num_dfs[0], num_dfs[1]) # output = ( s_h / 4, s_w / 4 ) self.disc_weight_dims["d_h%i_b" % 1] = (num_dfs[1], ) self.disc_weight_dims["d_h%i_W" % 2] = ( 5, 5, 5, num_dfs[1], num_dfs[2]) # output = ( s_h / 8, s_w / 8 ) self.disc_weight_dims["d_h%i_b" % 2] = (num_dfs[2], ) self.disc_weight_dims["d_h%i_W" % 3] = ( 5, 5, 5, num_dfs[2], num_dfs[3] ) # output = ( s_h / 16, s_w / 16 ) # pre: 1, 1, self.disc_weight_dims["d_h%i_b" % 3] = (num_dfs[3], ) # self.disc_weight_dims["d_h%i_W" % 3] = (1, 1, 1, num_dfs[2], 1) # output = ( s_h / 16, s_w / 16 ) # pre: 1, 1, # self.disc_weight_dims["d_h%i_b" % 3] = (1,) self.disc_weight_dims["d_h%i_W" % 4] = (1, 1, 1, num_dfs[3], 1) self.disc_weight_dims["d_h%i_b" % 4] = (1, ) # self.disc_weight_dims.update(OrderedDict([("d_h_out_lin_W", (num_dfs[3] * s_h8 * s_w8, 1)), # ("d_h_out_lin_b", (1,))])) ##################################################################################################################### self.distrib_weight_dims = OrderedDict() for zi in range(self.num_gen): self.distrib_weight_dims["dist_%i_mu" % zi] = ( 1, self.x_dim[0], self.x_dim[1], self.x_dim[2], 1 ) # self.num_classes self.distrib_weight_dims["dist_%i_var" % zi] = (1, self.x_dim[0], self.x_dim[1], self.x_dim[2], 1)
def build_bgan_graph(self): self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images') self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels') self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1], name='real_targets') self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z') #self.z_sum = histogram_summary("z", self.z) TODO looks cool self.gen_param_list = [] with tf.variable_scope("generator") as scope: for gi in range(self.num_gen): for m in range(self.num_mcmc): gen_params = AttributeDict() for name, shape in self.weight_dims.items(): gen_params[name] = tf.get_variable( "%s_%04d_%04d" % (name, gi, m), shape, initializer=tf.random_normal_initializer( stddev=0.02)) self.gen_param_list.append(gen_params) self.D, self.D_logits = self.discriminator(self.inputs, self.K + 1) self.Dsup, self.Dsup_logits = self.discriminator(self.labeled_inputs, self.K + 1, reuse=True) if self.K == 1: if self.wasserstein: self.d_loss_real = tf.reduce_mean(self.D_logits) else: # regular GAN constant_labels = np.zeros((self.batch_size, 2)) constant_labels[:, 1] = 1.0 self.d_loss_real = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.D_logits, labels=tf.constant(constant_labels))) else: self.d_loss_sup = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.Dsup_logits, labels=self.labels)) self.d_loss_real = -tf.reduce_mean( tf.log((1.0 - self.D[:, 0]) + 1e-8)) self.generation = defaultdict(list) for gen_params in self.gen_param_list: self.generation["g_prior"].append(self.gen_prior(gen_params)) self.generation["g_noise"].append(self.gen_noise(gen_params)) self.generation["generators"].append( self.generator(self.z, gen_params)) self.generation["gen_samplers"].append( self.sampler(self.z, gen_params)) D_, D_logits_ = self.discriminator(self.generator( self.z, gen_params), self.K + 1, reuse=True) self.generation["d_logits"].append(D_logits_) self.generation["d_probs"].append(D_) all_d_logits = tf.concat(self.generation["d_logits"], 0) if self.wasserstein: self.d_loss_fake = -tf.reduce_mean(all_d_logits) else: constant_labels = np.zeros( (self.batch_size * self.num_gen * self.num_mcmc, self.K + 1)) constant_labels[:, 0] = 1.0 # class label indicating it came from generator, aka fake self.d_loss_fake = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=all_d_logits, labels=tf.constant(constant_labels))) t_vars = tf.trainable_variables() self.d_vars = [var for var in t_vars if 'd_' in var.name] self.d_loss = self.d_loss_real + self.d_loss_fake if not self.ml: self.d_loss += self.disc_prior() + self.disc_noise() if self.K > 1: self.d_loss_semi = self.d_loss_sup + self.d_loss_real + self.d_loss_fake if not self.ml: self.d_loss_semi += self.disc_prior() + self.disc_noise() self.g_vars = [] for gi in range(self.num_gen): for m in range(self.num_mcmc): self.g_vars.append([ var for var in t_vars if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name ]) self.d_learning_rate = tf.placeholder(tf.float32, shape=[]) d_opt = self._get_optimizer(self.d_learning_rate) self.d_optim = d_opt.minimize(self.d_loss, var_list=self.d_vars) d_opt_adam = tf.train.AdamOptimizer(learning_rate=self.d_learning_rate, beta1=0.5) self.d_optim_adam = d_opt_adam.minimize(self.d_loss, var_list=self.d_vars) clip_d = [ w.assign(tf.clip_by_value(w, -0.01, 0.01)) for w in self.d_vars ] self.clip_d = clip_d if self.K > 1: self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[]) d_opt_semi = self._get_optimizer(self.d_semi_learning_rate) self.d_optim_semi = d_opt_semi.minimize(self.d_loss_semi, var_list=self.d_vars) d_opt_semi_adam = tf.train.AdamOptimizer( learning_rate=self.d_semi_learning_rate, beta1=0.5) self.d_optim_semi_adam = d_opt_semi_adam.minimize( self.d_loss_semi, var_list=self.d_vars) self.g_optims, self.g_optims_adam = [], [] self.g_learning_rate = tf.placeholder(tf.float32, shape=[]) for gi in range(self.num_gen * self.num_mcmc): if self.wasserstein: g_loss = tf.reduce_mean(self.generation["d_logits"][gi]) else: g_loss = -tf.reduce_mean( tf.log((1.0 - self.generation["d_probs"][gi][:, 0]) + 1e-8)) if not self.ml: g_loss += self.generation["g_prior"][gi] + self.generation[ "g_noise"][gi] self.generation["g_losses"].append(g_loss) g_opt = self._get_optimizer(self.g_learning_rate) self.g_optims.append( g_opt.minimize(g_loss, var_list=self.g_vars[gi])) g_opt_adam = tf.train.AdamOptimizer( learning_rate=self.g_learning_rate, beta1=0.5) self.g_optims_adam.append( g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi]))
def __init__(self, x_dim, z_dim, dataset_size, batch_size=64, gf_dim=64, df_dim=64, prior_std=1.0, J=1, M=1, num_classes=1, eta=2e-4, alpha=0.01, lr=0.0002, optimizer='adam', wasserstein=False, ml=False, gen_observed=1000): assert len(x_dim) == 3, "invalid image dims" c_dim = x_dim[2] self.is_grayscale = (c_dim == 1) self.optimizer = optimizer.lower() self.dataset_size = dataset_size self.batch_size = batch_size self.gen_observed = gen_observed self.x_dim = x_dim self.z_dim = z_dim self.gf_dim = gf_dim self.df_dim = df_dim self.c_dim = c_dim self.lr = lr self.d = None self.d1 = None # Parallel Tempering self.invT = 1 self.TGap = 1 self.LRGap = 1 self.EGap = 1 self.anneal = 1 self.lr_anneal = 1 self.wasserstein = wasserstein self.d_bn1 = batch_norm(name='d_bn1') self.d_bn2 = batch_norm(name='d_bn2') self.d_bn3 = batch_norm(name='d_bn3') self.d1_bn1 = batch_norm(name='d1_bn1') self.d1_bn2 = batch_norm(name='d1_bn2') self.d1_bn3 = batch_norm(name='d1_bn3') self.sd_bn1 = batch_norm(name='sd_bn1') self.sd_bn2 = batch_norm(name='sd_bn2') self.sd_bn3 = batch_norm(name='sd_bn3') self.g_bn0 = batch_norm(name='g_bn0') self.g_bn1 = batch_norm(name='g_bn1') self.g_bn2 = batch_norm(name='g_bn2') self.g_bn3 = batch_norm(name='g_bn3') self.wasserstein = wasserstein self.chain0_params = [] self.chain1_params = [] # Bayes self.prior_std = prior_std self.num_gen = J self.num_mcmc = M self.eta = eta self.alpha = alpha # ML self.ml = ml if self.ml: assert self.num_gen == 1, "cannot have >1 generator for ml" self.output_height = x_dim[0] self.output_width = x_dim[1] s_h, s_w = self.output_height, self.output_width s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2) self.gen_params = AttributeDict() self.bgen_params = AttributeDict() self.weight_dims = OrderedDict([ ("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)), ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16, )), ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)), ("g_h1_b", (self.gf_dim * 4, )), ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)), ("g_h2_b", (self.gf_dim * 2, )), ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)), ("g_h3_b", (self.gf_dim * 1, )), ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)), ("g_h4_b", (self.c_dim, )) ]) self.sghmc_noise = {} self.noise_std = np.sqrt(2 * self.alpha * self.eta) for name, dim in self.weight_dims.iteritems(): self.sghmc_noise[name] = tf.distributions.Normal( 0., self.noise_std * tf.ones(self.weight_dims[name])) self.K = num_classes # 1 means unsupervised, label == 0 always reserved for fake self.build_bgan_graph() if self.K > 1: print("self.K") print(self.K) self.build_test_graph()
def build_bgan_graph(self): self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images') self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels') self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1], name='real_targets') self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z') #self.z_sum = histogram_summary("z", self.z) TODO looks cool self.gen_param_list = [] with tf.variable_scope("generator") as scope: for gi in xrange(self.num_gen): for m in xrange(self.num_mcmc): gen_params = AttributeDict() for name, shape in self.weight_dims.iteritems(): gen_params[name] = tf.get_variable( "%s_%04d_%04d" % (name, gi, m), shape, initializer=tf.random_normal_initializer( stddev=0.02)) self.gen_param_list.append(gen_params) # Liyao: 11/Nov/2019, adding another markov chain (discriminator) # Parameters of chain0, the first chain. Note here this part of chain0 is the same as code in original version # of Bayesian GANs # ################################## Some Important Parameters of Chain0 ################################################# # self.D: discriminator # self.D_logits: final logisitc layer of discriminator0 # self.Dsup: discriminator0 for supervised learning # self.Dsup_logits: final logisitc layer of discriminator0 for supervised learning # self.d_loss_sup: supervised loss of discriminator0 # self.d_loss_real: loss of discriminator0 on real images # self.d_loss_fake: loss of discriminator0 on fake images # self.d_vars: all the variables we could get stored in discriminator0 # self.d_loss_semi: semi-supervised loss for discriminator0 # self.d_semi_learning_rate: semi-supervised learning rate for discriminator0 ################################## End of Some Important Parameters of Chain0 ########################################### # ################################## Some Important Parameters of Chain1 ################################################## # self.D1: discriminator1 # self.D1_logits: final logisitc layer of discriminator1 # self.Dsup1: discriminator1 for supervised learning # self.D1sup_logits: final logisitc layer of discriminator1 for supervised learning # self.d1_loss_sup: supervised loss of discriminator1 # self.d1_loss_real: loss of discriminator1 on real images # self.d1_loss_fake: loss of discriminator1 on fake images # self.d1_vars: all the variables we could get stored in discriminator1 # self.d1_loss_semi: semi-supervised loss for discriminator1 # self.d1_semi_learning_rate: semi-supervised learning rate for discriminator1 ################################## End of Some Important Parameters of Chain1 ########################################### self.D, self.D_logits = self.discriminator(self.inputs, self.K + 1) self.D1, self.D1_logits = self.discriminator1(self.inputs, self.K + 1) self.Dsup, self.Dsup_logits = self.discriminator(self.labeled_inputs, self.K + 1, reuse=True) self.Dsup1, self.Dsup1_logits = self.discriminator1( self.labeled_inputs, self.K + 1, reuse=True) self.d_loss_sup = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.Dsup_logits, labels=self.labels)) self.d1_loss_sup = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.Dsup1_logits, labels=self.labels)) self.d_loss_real = -tf.reduce_mean(tf.log((1.0 - self.D[:, 0]) + 1e-8)) self.d1_loss_real = -tf.reduce_mean( tf.log((1.0 - self.D1[:, 0]) + 1e-8)) self.generation = defaultdict(list) for gen_params in self.gen_param_list: self.generation["g_prior"].append(self.gen_prior(gen_params)) self.generation["g_noise"].append(self.gen_noise(gen_params)) self.generation["generators"].append( self.generator(self.z, gen_params)) self.generation["gen_samplers"].append( self.sampler(self.z, gen_params)) D_, D_logits_ = self.discriminator(self.generator( self.z, gen_params), self.K + 1, reuse=True) D_1, D_logits_1 = self.discriminator1(self.generator( self.z, gen_params), self.K + 1, reuse=True) self.generation["d_logits"].append(D_logits_) self.generation["d_probs"].append(D_) self.generation["d1_logits"].append(D_logits_1) self.generation["d1_probs"].append(D_1) all_d_logits = tf.concat(self.generation["d_logits"], 0) all_d1_logits = tf.concat(self.generation["d1_logits"], 0) constant_labels = np.zeros( (self.batch_size * self.num_gen * self.num_mcmc, self.K + 1)) constant_labels[:, 0] = 1.0 # class label indicating it came from generator, aka fake self.d_loss_fake = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=all_d_logits, labels=tf.constant(constant_labels))) self.d1_loss_fake = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=all_d1_logits, labels=tf.constant(constant_labels))) t_vars = tf.trainable_variables() self.d_vars = [var for var in t_vars if 'd_' in var.name] self.d1_vars = [var1 for var1 in t_vars if 'd1_' in var1.name] self.d_loss_semi = self.d_loss_sup + self.d_loss_real + self.d_loss_fake self.d1_loss_semi = self.d1_loss_sup + self.d1_loss_real + self.d1_loss_fake self.d_loss_semi += self.disc_prior() + self.disc_noise() # inverse temperature for chain member self.d1_loss_semi += self.disc1_prior() + self.disc1_noise(self.TGap) self.g_vars = [] for gi in xrange(self.num_gen): for m in xrange(self.num_mcmc): self.g_vars.append([ var for var in t_vars if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name ]) self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[]) d_opt_semi = self._get_optimizer(self.d_semi_learning_rate) self.d_optim_semi = d_opt_semi.minimize(self.d_loss_semi, var_list=self.d_vars) d_opt_semi_adam = tf.train.AdamOptimizer( learning_rate=self.d_semi_learning_rate, beta1=0.5) self.d_optim_semi_adam = d_opt_semi_adam.minimize(self.d_loss_semi, var_list=self.d_vars) self.d1_semi_learning_rate = tf.placeholder(tf.float32, shape=[]) d1_opt_semi = self._get_optimizer(self.d1_semi_learning_rate) self.d1_optim_semi = d1_opt_semi.minimize(self.d1_loss_semi, var_list=self.d1_vars) d1_opt_semi_adam = tf.train.AdamOptimizer( learning_rate=self.d1_semi_learning_rate, beta1=0.5) self.d1_optim_semi_adam = d1_opt_semi_adam.minimize( self.d1_loss_semi, var_list=self.d1_vars) self.g_optims, self.g_optims_adam, self.g1_optims, self.g1_optims_adam = [], [], [], [] self.g_learning_rate = tf.placeholder(tf.float32, shape=[]) for gi in xrange(self.num_gen * self.num_mcmc): g_loss = -tf.reduce_mean( tf.log((1.0 - self.generation["d_probs"][gi][:, 0]) + 1e-8)) g1_loss = -tf.reduce_mean( tf.log((1.0 - self.generation["d1_probs"][gi][:, 0]) + 1e-8)) g_loss += self.generation["g_prior"][gi] + self.generation[ "g_noise"][gi] g1_loss += self.generation["g_prior"][gi] + self.generation[ "g_noise"][gi] self.generation["g_losses"].append(g_loss) self.generation["g1_losses"].append(g1_loss) g_opt = self._get_optimizer(self.g_learning_rate) self.g_optims.append( g_opt.minimize(g_loss, var_list=self.g_vars[gi])) g_opt_adam = tf.train.AdamOptimizer( learning_rate=self.g_learning_rate, beta1=0.5) self.g_optims_adam.append( g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi])) g1_opt = self._get_optimizer(self.g_learning_rate) self.g1_optims.append( g1_opt.minimize(g1_loss, var_list=self.g_vars[gi])) g1_opt_adam = tf.train.AdamOptimizer( learning_rate=self.g_learning_rate, beta1=0.5) self.g1_optims_adam.append( g1_opt_adam.minimize(g1_loss, var_list=self.g_vars[gi]))
def construct_from_hypers(self, gen_kernel_size=3, num_dfs=None, num_gfs=None): self.num_disc_layers = 5 self.num_gen_layers = 19 self.d_batch_norm = AttributeDict([ ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers) ]) self.sup_g_batch_norm = AttributeDict([ ("sup_g_bn%i" % gbn_i, batch_norm(name='sup_g_bn%i' % gbn_i)) for gbn_i in range(self.num_gen_layers) ]) self.g_batch_norm = AttributeDict([ ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(self.num_gen_layers) ]) if num_dfs is None: num_dfs = [ self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8, self.df_dim ] if num_gfs is None: num_gfs = [ self.gf_dim, self.gf_dim, self.gf_dim * 2, self.gf_dim * 2, self.gf_dim * 4, self.gf_dim * 4, self.gf_dim * 8, self.gf_dim * 8, self.gf_dim * 16, self.gf_dim * 16, self.gf_dim * 8, self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim * 2, self.gf_dim, self.gf_dim, 1, 1 ] ## output logits s_h, s_w, s_d = self.x_dim[0], self.x_dim[1], self.x_dim[2] s_h2, s_w2, s_d2 = conv_out_size(s_h, 2), conv_out_size( s_w, 2), conv_out_size(s_d, 2) s_h4, s_w4, s_d4 = conv_out_size(s_h2, 2), conv_out_size( s_w2, 2), conv_out_size(s_d2, 2) s_h8, s_w8, s_d8 = conv_out_size(s_h4, 2), conv_out_size( s_w4, 2), conv_out_size(s_d4, 2) s_h16, s_w16, s_d16 = conv_out_size(s_h8, 2), conv_out_size( s_w8, 2), conv_out_size(s_d8, 2) ks = gen_kernel_size # self.gen_output_dims = OrderedDict() self.gen_weight_dims = OrderedDict() num_gfs = num_gfs + [0] # ?? channel = 1 self.gen_kernel_sizes = [ks] #### build unet_generator from the one-by-one self.gen_weight_dims["g_h%i_W" % 0] = (3, 3, 3, 1, num_gfs[0] ) # from the image self.gen_weight_dims["g_h%i_b" % 0] = (num_gfs[0], ) self.gen_weight_dims["g_h%i_W" % 1] = (3, 3, 3, num_gfs[1], num_gfs[1] ) # conv1 self.gen_weight_dims["g_h%i_b" % 1] = (num_gfs[1], ) self.gen_weight_dims["g_h%i_W" % 2] = (3, 3, 3, num_gfs[1], num_gfs[2]) self.gen_weight_dims["g_h%i_b" % 2] = (num_gfs[2], ) self.gen_weight_dims["g_h%i_W" % 3] = (3, 3, 3, num_gfs[3], num_gfs[3] ) # conv2 self.gen_weight_dims["g_h%i_b" % 3] = (num_gfs[3], ) self.gen_weight_dims["g_h%i_W" % 4] = (3, 3, 3, num_gfs[3], num_gfs[4]) self.gen_weight_dims["g_h%i_b" % 4] = (num_gfs[4], ) self.gen_weight_dims["g_h%i_W" % 5] = (3, 3, 3, num_gfs[5], num_gfs[5] ) # conv3 self.gen_weight_dims["g_h%i_b" % 5] = (num_gfs[5], ) self.gen_weight_dims["g_h%i_W" % 6] = (3, 3, 3, num_gfs[5], num_gfs[6]) self.gen_weight_dims["g_h%i_b" % 6] = (num_gfs[6], ) self.gen_weight_dims["g_h%i_W" % 7] = (3, 3, 3, num_gfs[7], num_gfs[7] ) # conv4 self.gen_weight_dims["g_h%i_b" % 7] = (num_gfs[7], ) self.gen_weight_dims["g_h%i_W" % 8] = (3, 3, 3, num_gfs[7], num_gfs[8]) self.gen_weight_dims["g_h%i_b" % 8] = (num_gfs[8], ) self.gen_weight_dims["g_h%i_W" % 9] = (3, 3, 3, num_gfs[9], num_gfs[9] ) # conv5 self.gen_weight_dims["g_h%i_b" % 9] = (num_gfs[9], ) self.gen_weight_dims["g_h%i_W" % 10] = (3, 3, 3, num_gfs[9] + num_gfs[7], num_gfs[10]) # conv6 concat conv4 self.gen_weight_dims["g_h%i_b" % 10] = (num_gfs[10], ) self.gen_weight_dims["g_h%i_W" % 11] = (3, 3, 3, num_gfs[11], num_gfs[11]) self.gen_weight_dims["g_h%i_b" % 11] = (num_gfs[11], ) self.gen_weight_dims["g_h%i_W" % 12] = (3, 3, 3, num_gfs[11] + num_gfs[5], num_gfs[12]) # conv7 concat conv3 self.gen_weight_dims["g_h%i_b" % 12] = (num_gfs[12], ) self.gen_weight_dims["g_h%i_W" % 13] = (3, 3, 3, num_gfs[13], num_gfs[13]) self.gen_weight_dims["g_h%i_b" % 13] = (num_gfs[13], ) self.gen_weight_dims["g_h%i_W" % 14] = (3, 3, 3, num_gfs[13] + num_gfs[3], num_gfs[14]) # conv8 concat conv2 self.gen_weight_dims["g_h%i_b" % 14] = (num_gfs[14], ) self.gen_weight_dims["g_h%i_W" % 15] = (3, 3, 3, num_gfs[15], num_gfs[15]) self.gen_weight_dims["g_h%i_b" % 15] = (num_gfs[15], ) self.gen_weight_dims["g_h%i_W" % 16] = (3, 3, 3, num_gfs[15] + num_gfs[1], num_gfs[16]) # conv9 concat conv1 self.gen_weight_dims["g_h%i_b" % 16] = (num_gfs[16], ) self.gen_weight_dims["g_h%i_W" % 17] = (3, 3, 3, num_gfs[17], num_gfs[17]) self.gen_weight_dims["g_h%i_b" % 17] = (num_gfs[17], ) ################### output layer ######################### self.gen_weight_dims["g_h%i_W" % 18] = (1, 1, 1, num_gfs[17], num_gfs[18]) self.gen_weight_dims["g_h%i_b" % 18] = (num_gfs[18], ) ######################################################################################################### self.disc_weight_dims = OrderedDict() self.disc_weight_dims["d_h%i_W" % 0] = ( 5, 5, 5, 1, num_dfs[0]) # output = ( s_h / 2, s_w / 2 ) self.disc_weight_dims["d_h%i_b" % 0] = (num_dfs[0], ) self.disc_weight_dims["d_h%i_W" % 1] = ( 5, 5, 5, num_dfs[0], num_dfs[1]) # output = ( s_h / 4, s_w / 4 ) self.disc_weight_dims["d_h%i_b" % 1] = (num_dfs[1], ) self.disc_weight_dims["d_h%i_W" % 2] = ( 5, 5, 5, num_dfs[1], num_dfs[2]) # output = ( s_h / 8, s_w / 8 ) self.disc_weight_dims["d_h%i_b" % 2] = (num_dfs[2], ) self.disc_weight_dims["d_h%i_W" % 3] = ( 5, 5, 5, num_dfs[2], num_dfs[3] ) # output = ( s_h / 16, s_w / 16 ) # pre: 1, 1, self.disc_weight_dims["d_h%i_b" % 3] = (num_dfs[3], ) self.disc_weight_dims["d_h%i_W" % 4] = (1, 1, 1, num_dfs[3], 1) self.disc_weight_dims["d_h%i_b" % 4] = (1, ) ##################################################################################################################### self.distrib_weight_dims = OrderedDict() for zi in range(self.num_gen): self.distrib_weight_dims["dist_%i_mu" % zi] = (1, self.x_dim[0], self.x_dim[1], self.x_dim[2]) # self.num_classes self.distrib_weight_dims["dist_%i_var" % zi] = (1, self.x_dim[0], self.x_dim[1], self.x_dim[2]) ##################################################################################################################### for k, v in self.gen_weight_dims.items( ): # k is the name, v is the dim print("gen_weight_dims - %s: %s" % (k, v)) print('****') for k, v in self.disc_weight_dims.items(): print("dics_weight_dims - %s: %s" % (k, v)) print('****') for k, v in self.distrib_weight_dims.items(): print("dics_weight_dims - %s: %s" % (k, v))
class BDCGAN_Semi(object): def __init__(self, x_dim, z_dim, dataset_size, batch_size=64, gf_dim=64, df_dim=64, prior_std=1.0, J=1, M=1, num_classes=1, eta=2e-4, num_layers=4, alpha=0.01, lr=0.0002, optimizer='adam', wasserstein=False, ml=False, J_d=None): assert len(x_dim) == 3, "invalid image dims" c_dim = x_dim[2] self.is_grayscale = (c_dim == 1) self.optimizer = optimizer.lower() self.dataset_size = dataset_size self.batch_size = batch_size self.K = num_classes self.x_dim = x_dim self.z_dim = z_dim self.gf_dim = gf_dim self.df_dim = df_dim self.c_dim = c_dim self.lr = lr # Bayes self.prior_std = prior_std self.num_gen = J self.num_disc = J_d if J_d is not None else 1 self.num_mcmc = M self.eta = eta self.alpha = alpha # ML self.ml = ml if self.ml: assert self.num_gen == 1 and self.num_disc == 1 and self.num_mcmc == 1, "invalid settings for ML training" self.noise_std = np.sqrt(2 * self.alpha * self.eta) def get_strides(num_layers, num_pool): interval = int(math.floor(num_layers / float(num_pool))) strides = np.array([1] * num_layers) strides[0:interval * num_pool:interval] = 2 return strides self.num_pool = 4 self.max_num_dfs = 512 self.gen_strides = get_strides(num_layers, self.num_pool) self.disc_strides = self.gen_strides num_dfs = np.cumprod(np.array([self.df_dim] + list(self.disc_strides)))[:-1] num_dfs[num_dfs >= self.max_num_dfs] = self.max_num_dfs # memory self.num_dfs = list(num_dfs) self.num_gfs = self.num_dfs[::-1] self.construct_from_hypers(gen_strides=self.gen_strides, disc_strides=self.disc_strides, num_gfs=self.num_gfs, num_dfs=self.num_dfs) self.build_bgan_graph() self.build_test_graph() def construct_from_hypers(self, gen_kernel_size=5, gen_strides=[2, 2, 2, 2], disc_kernel_size=5, disc_strides=[2, 2, 2, 2], num_dfs=None, num_gfs=None): self.d_batch_norm = AttributeDict([ ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(len(disc_strides)) ]) self.sup_d_batch_norm = AttributeDict([ ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(5) ]) self.g_batch_norm = AttributeDict([ ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(len(gen_strides)) ]) if num_dfs is None: num_dfs = [ self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8 ] if num_gfs is None: num_gfs = [ self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim ] assert len(gen_strides) == len(num_gfs), "invalid hypers!" assert len(disc_strides) == len(num_dfs), "invalid hypers!" s_h, s_w = self.x_dim[0], self.x_dim[1] ks = gen_kernel_size self.gen_output_dims = OrderedDict() self.gen_weight_dims = OrderedDict() num_gfs = num_gfs + [self.c_dim] self.gen_kernel_sizes = [ks] for layer in range(len(gen_strides))[::-1]: self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w) assert gen_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.gen_weight_dims["g_h%i_W" % (layer + 1)] = (ks, ks, num_gfs[layer + 1], num_gfs[layer]) self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer + 1], ) s_h, s_w = conv_out_size(s_h, gen_strides[layer]), conv_out_size( s_w, gen_strides[layer]) ks = kernel_sizer(ks, gen_strides[layer]) self.gen_kernel_sizes.append(ks) self.gen_weight_dims.update( OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)), ("g_h0_lin_b", (num_gfs[0] * s_h * s_w, ))])) self.gen_output_dims["g_h0_out"] = (s_h, s_w) self.disc_weight_dims = OrderedDict() s_h, s_w = self.x_dim[0], self.x_dim[1] num_dfs = [self.c_dim] + num_dfs ks = disc_kernel_size self.disc_kernel_sizes = [ks] for layer in range(len(disc_strides)): assert disc_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.disc_weight_dims["d_h%i_W" % layer] = (ks, ks, num_dfs[layer], num_dfs[layer + 1]) self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1], ) s_h, s_w = conv_out_size(s_h, disc_strides[layer]), conv_out_size( s_w, disc_strides[layer]) ks = kernel_sizer(ks, disc_strides[layer]) self.disc_kernel_sizes.append(ks) self.disc_weight_dims.update( OrderedDict([("d_h_end_lin_W", (num_dfs[-1] * s_h * s_w, num_dfs[-1])), ("d_h_end_lin_b", (num_dfs[-1], )), ("d_h_out_lin_W", (num_dfs[-1], self.K)), ("d_h_out_lin_b", (self.K, ))])) for k, v in self.gen_output_dims.items(): print "%s: %s" % (k, v) print '****' for k, v in self.gen_weight_dims.items(): print "%s: %s" % (k, v) print '****' for k, v in self.disc_weight_dims.items(): print "%s: %s" % (k, v) def construct_nets(self): self.num_disc_layers = 5 self.num_gen_layers = 5 self.d_batch_norm = AttributeDict([ ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers) ]) self.sup_d_batch_norm = AttributeDict([ ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers) ]) self.g_batch_norm = AttributeDict([ ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(self.num_gen_layers) ]) s_h, s_w = self.x_dim[0], self.x_dim[1] s_h2, s_w2 = conv_out_size(s_h, 2), conv_out_size(s_w, 2) s_h4, s_w4 = conv_out_size(s_h2, 2), conv_out_size(s_w2, 2) s_h8, s_w8 = conv_out_size(s_h4, 2), conv_out_size(s_w4, 2) s_h16, s_w16 = conv_out_size(s_h8, 2), conv_out_size(s_w8, 2) self.gen_output_dims = OrderedDict([("g_h0_out", (s_h16, s_w16)), ("g_h1_out", (s_h8, s_w8)), ("g_h2_out", (s_h4, s_w4)), ("g_h3_out", (s_h2, s_w2)), ("g_h4_out", (s_h, s_w))]) self.gen_weight_dims = OrderedDict([ ("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)), ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16, )), ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)), ("g_h1_b", (self.gf_dim * 4, )), ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)), ("g_h2_b", (self.gf_dim * 2, )), ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)), ("g_h3_b", (self.gf_dim * 1, )), ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)), ("g_h4_b", (self.c_dim, )) ]) self.disc_weight_dims = OrderedDict([ ("d_h0_W", (5, 5, self.c_dim, self.df_dim)), ("d_h0_b", (self.df_dim, )), ("d_h1_W", (5, 5, self.df_dim, self.df_dim * 2)), ("d_h1_b", (self.df_dim * 2, )), ("d_h2_W", (5, 5, self.df_dim * 2, self.df_dim * 4)), ("d_h2_b", (self.df_dim * 4, )), ("d_h3_W", (5, 5, self.df_dim * 4, self.df_dim * 8)), ("d_h3_b", (self.df_dim * 8, )), ("d_h_end_lin_W", (self.df_dim * 8 * s_h16 * s_w16, self.df_dim * 4)), ("d_h_end_lin_b", (self.df_dim * 4, )), ("d_h_out_lin_W", (self.df_dim * 4, self.K)), ("d_h_out_lin_b", (self.K, )) ]) def _get_optimizer(self, lr): if self.optimizer == 'adam': return tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) elif self.optimizer == 'sgd': return tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.5) else: raise ValueError("Optimizer must be either 'adam' or 'sgd'") def initialize_wgts(self, scope_str): if scope_str == "generator": weight_dims = self.gen_weight_dims numz = self.num_gen elif scope_str == "discriminator": weight_dims = self.disc_weight_dims numz = self.num_disc else: raise RuntimeError("invalid scope!") param_list = [] with tf.variable_scope(scope_str) as scope: for zi in xrange(numz): for m in xrange(self.num_mcmc): wgts_ = AttributeDict() for name, shape in weight_dims.iteritems(): wgts_[name] = tf.get_variable( "%s_%04d_%04d" % (name, zi, m), shape, initializer=tf.random_normal_initializer( stddev=0.02)) param_list.append(wgts_) return param_list def build_bgan_graph(self): self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images') self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels') self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K], name='real_targets') self.z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim, self.num_gen], name='z') self.z_sampler = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z_sampler') # initialize generator weights self.gen_param_list = self.initialize_wgts("generator") self.disc_param_list = self.initialize_wgts("discriminator") ### build discrimitive losses and optimizers # prep optimizer args self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[]) # compile all disciminative weights t_vars = tf.trainable_variables() self.d_vars = [] for di in xrange(self.num_disc): for m in xrange(self.num_mcmc): self.d_vars.append([ var for var in t_vars if 'd_' in var.name and "_%04d_%04d" % (di, m) in var.name ]) ### build disc losses and optimizers self.d_losses, self.d_optims_semi, self.d_optims_semi_adam = [], [], [] for di, disc_params in enumerate(self.disc_param_list): d_probs, d_logits, _ = self.discriminator(self.inputs, self.K, disc_params) d_loss_real = -tf.reduce_mean(tf.reduce_logsumexp(d_logits, 1)) +\ tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits, 1))) d_loss_fakes = [] for gi, gen_params in enumerate(self.gen_param_list): d_probs_, d_logits_, _ = self.discriminator( self.generator(self.z[:, :, gi % self.num_gen], gen_params), self.K, disc_params) d_loss_fake_ = tf.reduce_mean( tf.nn.softplus(tf.reduce_logsumexp(d_logits_, 1))) d_loss_fakes.append(d_loss_fake_) d_sup_probs, d_sup_logits, _ = self.discriminator( self.labeled_inputs, self.K, disc_params) d_loss_sup = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=d_sup_logits, labels=self.labels)) d_losses_semi = [] for d_loss_fake_ in d_loss_fakes: d_loss_semi_ = d_loss_sup + d_loss_real * float( self.num_gen) + d_loss_fake_ if not self.ml: d_loss_semi_ += self.disc_prior( disc_params) + self.disc_noise(disc_params) d_losses_semi.append(tf.reshape(d_loss_semi_, [1])) d_loss_semi = tf.reduce_logsumexp(tf.concat(d_losses_semi, 0)) self.d_losses.append(d_loss_semi) d_opt_semi = self._get_optimizer(self.d_semi_learning_rate) self.d_optims_semi.append( d_opt_semi.minimize(d_loss_semi, var_list=self.d_vars[di])) d_opt_semi_adam = tf.train.AdamOptimizer( learning_rate=self.d_semi_learning_rate, beta1=0.5) self.d_optims_semi_adam.append( d_opt_semi_adam.minimize(d_loss_semi, var_list=self.d_vars[di])) ### build generative losses and optimizers self.g_learning_rate = tf.placeholder(tf.float32, shape=[]) self.g_vars = [] for gi in xrange(self.num_gen): for m in xrange(self.num_mcmc): self.g_vars.append([ var for var in t_vars if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name ]) self.g_losses, self.g_optims_semi, self.g_optims_semi_adam = [], [], [] for gi, gen_params in enumerate(self.gen_param_list): gi_losses = [] for disc_params in self.disc_param_list: d_probs_, d_logits_, d_features_fake = self.discriminator( self.generator(self.z[:, :, gi % self.num_gen], gen_params), self.K, disc_params) _, _, d_features_real = self.discriminator( self.inputs, self.K, disc_params) g_loss_ = -tf.reduce_mean(tf.reduce_logsumexp(d_logits_, 1)) +\ tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_, 1))) # not needed?! g_loss_ += tf.reduce_mean( huber_loss(d_features_real[-1], d_features_fake[-1])) if not self.ml: g_loss_ += self.gen_prior(gen_params) + self.gen_noise( gen_params) gi_losses.append(tf.reshape(g_loss_, [1])) g_loss = tf.reduce_logsumexp(tf.concat(gi_losses, 0)) self.g_losses.append(g_loss) g_opt = self._get_optimizer(self.g_learning_rate) self.g_optims_semi.append( g_opt.minimize(g_loss, var_list=self.g_vars[gi])) g_opt_adam = tf.train.AdamOptimizer( learning_rate=self.g_learning_rate, beta1=0.5) self.g_optims_semi_adam.append( g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi])) ### build samplers self.gen_samplers = [] for gi, gen_params in enumerate(self.gen_param_list): self.gen_samplers.append(self.generator(self.z_sampler, gen_params)) ### build vanilla supervised loss self.lbls = tf.placeholder(tf.float32, [self.batch_size, self.K], name='real_sup_targets') self.S, self.S_logits = self.sup_discriminator(self.inputs, self.K) self.s_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=self.S_logits, labels=self.lbls)) t_vars = tf.trainable_variables() self.sup_vars = [var for var in t_vars if 'sup_' in var.name] supervised_lr = 0.05 * self.lr s_opt = self._get_optimizer(supervised_lr) self.s_optim = s_opt.minimize(self.s_loss, var_list=self.sup_vars) s_opt_adam = tf.train.AdamOptimizer(learning_rate=supervised_lr, beta1=0.5) self.s_optim_adam = s_opt_adam.minimize(self.s_loss, var_list=self.sup_vars) def build_test_graph(self): self.test_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_test_images') self.test_d_probs, self.test_d_logits = [], [] for disc_params in self.disc_param_list: test_d_probs_, test_d_logits_, _ = self.discriminator( self.test_inputs, self.K, disc_params, train=False) self.test_d_probs.append(test_d_probs_) self.test_d_logits.append(test_d_logits_) # build standard purely supervised losses and optimizers self.test_s_probs, self.test_s_logits = self.sup_discriminator( self.test_inputs, self.K, reuse=True) def sup_discriminator(self, image, K, reuse=False): # TODO collapse this into disc with tf.variable_scope("sup_discriminator") as scope: if reuse: scope.reuse_variables() h0 = lrelu(conv2d(image, self.df_dim, name='sup_h0_conv')) h1 = lrelu( self.sup_d_batch_norm.sd_bn1( conv2d(h0, self.df_dim * 2, name='sup_h1_conv'))) h2 = lrelu( self.sup_d_batch_norm.sd_bn2( conv2d(h1, self.df_dim * 4, name='sup_h2_conv'))) h3 = lrelu( self.sup_d_batch_norm.sd_bn3( conv2d(h2, self.df_dim * 8, name='sup_h3_conv'))) h4 = linear(tf.reshape(h3, [self.batch_size, -1]), K, 'sup_h3_lin') return tf.nn.softmax(h4), h4 def discriminator(self, image, K, disc_params, train=True): with tf.variable_scope("discriminator") as scope: h = image for layer in range(len(self.disc_strides)): if layer == 0: h = lrelu( conv2d(h, self.disc_weight_dims["d_h%i_W" % layer][-1], name='d_h%i_conv' % layer, k_h=self.disc_kernel_sizes[layer], k_w=self.disc_kernel_sizes[layer], d_h=self.disc_strides[layer], d_w=self.disc_strides[layer], w=disc_params["d_h%i_W" % layer], biases=disc_params["d_h%i_b" % layer])) else: h = lrelu(self.d_batch_norm["d_bn%i" % layer](conv2d( h, self.disc_weight_dims["d_h%i_W" % layer][-1], name='d_h%i_conv' % layer, k_h=self.disc_kernel_sizes[layer], k_w=self.disc_kernel_sizes[layer], d_h=self.disc_strides[layer], d_w=self.disc_strides[layer], w=disc_params["d_h%i_W" % layer], biases=disc_params["d_h%i_b" % layer]), train=train)) h_end = lrelu( linear(tf.reshape(h, [self.batch_size, -1]), self.df_dim * 4, "d_h_end_lin", matrix=disc_params.d_h_end_lin_W, bias=disc_params.d_h_end_lin_b)) # for feature norm h_out = linear(h_end, K, 'd_h_out_lin', matrix=disc_params.d_h_out_lin_W, bias=disc_params.d_h_out_lin_b) return tf.nn.softmax(h_out), h_out, [h_end] def generator(self, z, gen_params): with tf.variable_scope("generator") as scope: h = linear(z, self.gen_weight_dims["g_h0_lin_W"][-1], 'g_h0_lin', matrix=gen_params.g_h0_lin_W, bias=gen_params.g_h0_lin_b) h = tf.nn.relu(self.g_batch_norm.g_bn0(h)) h = tf.reshape(h, [ self.batch_size, self.gen_output_dims["g_h0_out"][0], self.gen_output_dims["g_h0_out"][1], -1 ]) for layer in range(1, len(self.gen_strides) + 1): out_shape = [ self.batch_size, self.gen_output_dims["g_h%i_out" % layer][0], self.gen_output_dims["g_h%i_out" % layer][1], self.gen_weight_dims["g_h%i_W" % layer][-2] ] h = deconv2d(h, out_shape, k_h=self.gen_kernel_sizes[layer - 1], k_w=self.gen_kernel_sizes[layer - 1], d_h=self.gen_strides[layer - 1], d_w=self.gen_strides[layer - 1], name='g_h%i' % layer, w=gen_params["g_h%i_W" % layer], biases=gen_params["g_h%i_b" % layer]) if layer < len(self.gen_strides): h = tf.nn.relu(self.g_batch_norm["g_bn%i" % layer](h)) return tf.nn.tanh(h) def gen_prior(self, gen_params): with tf.variable_scope("generator") as scope: prior_loss = 0.0 for var in gen_params.values(): nn = tf.divide(var, self.prior_std) prior_loss += tf.reduce_mean(tf.multiply(nn, nn)) prior_loss /= self.dataset_size return prior_loss def gen_noise(self, gen_params): with tf.variable_scope("generator") as scope: noise_loss = 0.0 for name, var in gen_params.iteritems(): noise_ = tf.contrib.distributions.Normal( mu=0., sigma=self.noise_std * tf.ones(var.get_shape())) noise_loss += tf.reduce_sum(var * noise_.sample()) noise_loss /= self.dataset_size return noise_loss def disc_prior(self, disc_params): with tf.variable_scope("discriminator") as scope: prior_loss = 0.0 for var in disc_params.values(): nn = tf.divide(var, self.prior_std) prior_loss += tf.reduce_mean(tf.multiply(nn, nn)) prior_loss /= self.dataset_size return prior_loss def disc_noise(self, disc_params): with tf.variable_scope("discriminator") as scope: noise_loss = 0.0 for var in disc_params.values(): noise_ = tf.contrib.distributions.Normal( mu=0., sigma=self.noise_std * tf.ones(var.get_shape())) noise_loss += tf.reduce_sum(var * noise_.sample()) noise_loss /= self.dataset_size return noise_loss
def construct_from_hypers(self, gen_kernel_size=5, gen_strides=[2, 2, 2, 2], disc_kernel_size=5, disc_strides=[2, 2, 2, 2], num_dfs=None, num_gfs=None): self.d_batch_norm = AttributeDict([ ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(len(disc_strides)) ]) self.sup_d_batch_norm = AttributeDict([ ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(5) ]) self.g_batch_norm = AttributeDict([ ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(len(gen_strides)) ]) if num_dfs is None: num_dfs = [ self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8 ] if num_gfs is None: num_gfs = [ self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim ] assert len(gen_strides) == len(num_gfs), "invalid hypers!" assert len(disc_strides) == len(num_dfs), "invalid hypers!" s_h, s_w = self.x_dim[0], self.x_dim[1] ks = gen_kernel_size self.gen_output_dims = OrderedDict() self.gen_weight_dims = OrderedDict() num_gfs = num_gfs + [self.c_dim] self.gen_kernel_sizes = [ks] for layer in range(len(gen_strides))[::-1]: self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w) assert gen_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.gen_weight_dims["g_h%i_W" % (layer + 1)] = (ks, ks, num_gfs[layer + 1], num_gfs[layer]) self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer + 1], ) s_h, s_w = conv_out_size(s_h, gen_strides[layer]), conv_out_size( s_w, gen_strides[layer]) ks = kernel_sizer(ks, gen_strides[layer]) self.gen_kernel_sizes.append(ks) self.gen_weight_dims.update( OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)), ("g_h0_lin_b", (num_gfs[0] * s_h * s_w, ))])) self.gen_output_dims["g_h0_out"] = (s_h, s_w) self.disc_weight_dims = OrderedDict() s_h, s_w = self.x_dim[0], self.x_dim[1] num_dfs = [self.c_dim] + num_dfs ks = disc_kernel_size self.disc_kernel_sizes = [ks] for layer in range(len(disc_strides)): assert disc_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.disc_weight_dims["d_h%i_W" % layer] = (ks, ks, num_dfs[layer], num_dfs[layer + 1]) self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1], ) s_h, s_w = conv_out_size(s_h, disc_strides[layer]), conv_out_size( s_w, disc_strides[layer]) ks = kernel_sizer(ks, disc_strides[layer]) self.disc_kernel_sizes.append(ks) self.disc_weight_dims.update( OrderedDict([("d_h_end_lin_W", (num_dfs[-1] * s_h * s_w, num_dfs[-1])), ("d_h_end_lin_b", (num_dfs[-1], )), ("d_h_out_lin_W", (num_dfs[-1], self.K)), ("d_h_out_lin_b", (self.K, ))])) for k, v in self.gen_output_dims.items(): print "%s: %s" % (k, v) print '****' for k, v in self.gen_weight_dims.items(): print "%s: %s" % (k, v) print '****' for k, v in self.disc_weight_dims.items(): print "%s: %s" % (k, v)
class BDCGAN_Semi_3d(object): def __init__(self, x_dim, z_dim, dataset_size, batch_size=64, gf_dim=64, df_dim=64, prior_std=1.0, J=1, M=1, num_classes=1, eta=1, num_layers=4, alpha=0.01, lr=0.0002, optimizer='adam', wasserstein=False, ml=False, J_d=None): # eta=2e-4, print("ml = ", ml) self.optimizer = optimizer.lower() self.dataset_size = dataset_size self.batch_size = batch_size self.K = num_classes self.x_dim = x_dim self.z_dim = z_dim # generated sample's dim self.gf_dim = gf_dim # ?? what is df_dim = 64 ? self.df_dim = df_dim self.c_dim = x_dim[3] # x_dim = [x, y, z, c] self.is_grayscale = (self.c_dim == 1) self.lr = lr # Bayes self.prior_std = prior_std self.num_gen = J # what is num_gen ?? self.num_disc = J_d if J_d is not None else 1 self.num_mcmc = M self.eta = eta # not required in variational inference and MC dropout self.alpha = alpha # not required in variational inference and MC dropout # ML self.ml = ml if self.ml: assert self.num_gen == 1 and self.num_disc == 1 and self.num_mcmc == 1, "invalid settings for ML training" self.noise_std = 10 # np.sqrt(2 * self.alpha * self.eta)\ def get_strides(num_layers, num_pool): interval = int(math.floor(num_layers / float(num_pool))) strides = np.array([1] * num_layers) strides[0:interval * num_pool:interval] = 2 return strides self.num_pool = 4 self.max_num_dfs = 1024 # default - 512 self.gen_strides = get_strides(num_layers, self.num_pool) self.disc_strides = self.gen_strides num_dfs = np.cumprod(np.array([self.df_dim] + list(self.disc_strides)))[:-1] num_dfs[num_dfs >= self.max_num_dfs] = self.max_num_dfs # memory self.num_dfs = list(num_dfs) self.num_gfs = self.num_dfs[::-1] self.construct_from_hypers(gen_strides=self.gen_strides, disc_strides=self.disc_strides, num_gfs=self.num_gfs, num_dfs=self.num_dfs) self.build_bgan_graph() self.build_test_graph() def construct_from_hypers(self, gen_kernel_size=5, gen_strides=[2, 2, 2, 2], disc_kernel_size=5, disc_strides=[2, 2, 2, 2], num_dfs=None, num_gfs=None): self.d_batch_norm = AttributeDict( [("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(len(disc_strides))]) self.sup_d_batch_norm = AttributeDict( [("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(5)]) self.g_batch_norm = AttributeDict( [("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(len(gen_strides))]) if num_dfs is None: num_dfs = [self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8] if num_gfs is None: num_gfs = [self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim] assert len(gen_strides) == len(num_gfs), "invalid hypers!" assert len(disc_strides) == len(num_dfs), "invalid hypers!" s_h, s_w = self.x_dim[0], self.x_dim[1] ks = gen_kernel_size self.gen_output_dims = OrderedDict() self.gen_weight_dims = OrderedDict() num_gfs = num_gfs + [self.c_dim] self.gen_kernel_sizes = [ks] for layer in range(len(gen_strides))[::-1]: self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w) assert gen_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.gen_weight_dims["g_h%i_W" % (layer + 1)] = (ks, ks, num_gfs[layer + 1], num_gfs[layer]) self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer + 1],) s_h, s_w = conv_out_size(s_h, gen_strides[layer]), conv_out_size(s_w, gen_strides[layer]) ks = kernel_sizer(ks, gen_strides[layer]) self.gen_kernel_sizes.append(ks) self.gen_weight_dims.update(OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)), ("g_h0_lin_b", (num_gfs[0] * s_h * s_w,))])) self.gen_output_dims["g_h0_out"] = (s_h, s_w) self.disc_weight_dims = OrderedDict() s_h, s_w = self.x_dim[0], self.x_dim[1] num_dfs = [self.c_dim] + num_dfs ks = disc_kernel_size self.disc_kernel_sizes = [ks] for layer in range(len(disc_strides)): assert disc_strides[layer] <= 2, "invalid stride" assert ks % 2 == 1, "invalid kernel size" self.disc_weight_dims["d_h%i_W" % layer] = (ks, ks, num_dfs[layer], num_dfs[layer + 1]) self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1],) s_h, s_w = conv_out_size(s_h, disc_strides[layer]), conv_out_size(s_w, disc_strides[layer]) ks = kernel_sizer(ks, disc_strides[layer]) self.disc_kernel_sizes.append(ks) self.disc_weight_dims.update(OrderedDict([("d_h_end_lin_W", (num_dfs[-1] * s_h * s_w, num_dfs[-1])), ("d_h_end_lin_b", (num_dfs[-1],)), ("d_h_out_lin_W", (num_dfs[-1], self.K)), ("d_h_out_lin_b", (self.K,))])) for k, v in self.gen_output_dims.items(): print("gen_output_dims - %s: %s" % (k, v)) print('****') for k, v in self.gen_weight_dims.items(): print("gen_weight_dims - %s: %s" % (k, v)) print('****') for k, v in self.disc_weight_dims.items(): print("dics_weight_dims - %s: %s" % (k, v)) def construct_nets(self): self.num_disc_layers = 5 self.num_gen_layers = 5 self.d_batch_norm = AttributeDict( [("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers)]) self.sup_d_batch_norm = AttributeDict( [("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers)]) self.g_batch_norm = AttributeDict( [("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(self.num_gen_layers)]) s_h, s_w = self.x_dim[0], self.x_dim[1] s_h2, s_w2 = conv_out_size(s_h, 2), conv_out_size(s_w, 2) s_h4, s_w4 = conv_out_size(s_h2, 2), conv_out_size(s_w2, 2) s_h8, s_w8 = conv_out_size(s_h4, 2), conv_out_size(s_w4, 2) s_h16, s_w16 = conv_out_size(s_h8, 2), conv_out_size(s_w8, 2) self.gen_output_dims = OrderedDict([("g_h0_out", (s_h16, s_w16)), ("g_h1_out", (s_h8, s_w8)), ("g_h2_out", (s_h4, s_w4)), ("g_h3_out", (s_h2, s_w2)), ("g_h4_out", (s_h, s_w))]) self.gen_weight_dims = OrderedDict([("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)), ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16,)), ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)), ("g_h1_b", (self.gf_dim * 4,)), ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)), ("g_h2_b", (self.gf_dim * 2,)), ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)), ("g_h3_b", (self.gf_dim * 1,)), ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)), ("g_h4_b", (self.c_dim,))]) self.disc_weight_dims = OrderedDict([("d_h0_W", (5, 5, self.c_dim, self.df_dim)), ("d_h0_b", (self.df_dim,)), ("d_h1_W", (5, 5, self.df_dim, self.df_dim * 2)), ("d_h1_b", (self.df_dim * 2,)), ("d_h2_W", (5, 5, self.df_dim * 2, self.df_dim * 4)), ("d_h2_b", (self.df_dim * 4,)), ("d_h3_W", (5, 5, self.df_dim * 4, self.df_dim * 8)), ("d_h3_b", (self.df_dim * 8,)), ("d_h_end_lin_W", (self.df_dim * 8 * s_h16 * s_w16, self.df_dim * 4)), ("d_h_end_lin_b", (self.df_dim * 4,)), ("d_h_out_lin_W", (self.df_dim * 4, self.K)), ("d_h_out_lin_b", (self.K,))]) def _get_optimizer(self, lr): if self.optimizer == 'adam': return tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) elif self.optimizer == 'sgd': return tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.5) else: raise ValueError("Optimizer must be either 'adam' or 'sgd'") def initialize_wgts(self, scope_str): if scope_str == "generator": weight_dims = self.gen_weight_dims numz = self.num_gen elif scope_str == "discriminator": weight_dims = self.disc_weight_dims numz = self.num_disc else: raise RuntimeError("invalid scope!") param_list = [] with tf.variable_scope(scope_str) as scope: # iterated J (numz / num_gen) x num_mcmc = 20 for zi in range(numz): # numz: num_gen / num_disc for m in range(self.num_mcmc): wgts_ = AttributeDict() for name, shape in weight_dims.items(): wgts_[name] = tf.get_variable("%s_%04d_%04d" % (name, zi, m), shape, initializer=tf.random_normal_initializer(stddev=0.02)) param_list.append(wgts_) return param_list def build_bgan_graph(self): # unsupervised images from data distribution self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images') # for discrinimator: from supervised batch images self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels') self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K], name='real_targets') # for generator self.z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim, self.num_gen], name='z') # [64, 100, 10] self.z_sampler = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z_sampler') # initialize generator weights self.gen_param_list = self.initialize_wgts("generator") # num_gen * num_mcmc - list self.disc_param_list = self.initialize_wgts("discriminator") # num_disc * num_mcmc ############################ build discrimitive losses and optimizers ########################################## self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[]) t_vars = tf.trainable_variables() # compile all disciminative weights # returns a list of trainable variables self.d_vars = [] for di in range(self.num_disc): for m in range(self.num_mcmc): self.d_vars.append([var for var in t_vars if 'd_' in var.name and "_%04d_%04d" % (di, m) in var.name]) self.d_losses, self.d_optims_semi, self.d_optims_semi_adam = [], [], [] ### self.d_optims_semi is user specified optimizer for di, disc_params in enumerate(self.disc_param_list): # with len(disc_param_list) > 1, the first discrinimator could be reuse = False, however, the second should use the variables # Part I: real #################### # d_probs = softmax(d_logits), d_logits = linear(pre-layer) d_probs_real, d_logits_real, _ = self.discriminator(self.inputs, self.K, disc_params, reuse=tf.AUTO_REUSE) # JT-0228: d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real, labels=tf.ones_like(d_probs_real))) d_loss_real = - tf.reduce_mean(tf.reduce_logsumexp(d_logits_real, 1)) \ + tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_real, 1))) # Part II: fake #################### d_loss_fakes = [] for gi, gen_params in enumerate(self.gen_param_list): # iterate num_gen * num_mcmc times d_probs_fake, d_logits_fake, _ = self.discriminator( self.generator(self.z[:, :, gi % self.num_gen], gen_params), self.K, disc_params, reuse=True) # JT-0228: d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_probs_fake))) d_loss_fake = tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_fake, 1))) d_loss_fakes.append(d_loss_fake) # Part III: sup #################### d_sup_probs, d_sup_logits, _ = self.discriminator(self.labeled_inputs, self.K, disc_params, reuse=tf.AUTO_REUSE) d_loss_sup = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=d_sup_logits, labels=self.labels)) ################### total loss for semi-supervised discriminator ###################### d_losses_semi = [] for d_loss_fake_ in d_loss_fakes: d_loss_semi_ = d_loss_sup + d_loss_real * float(self.num_gen) + d_loss_fake_ if not self.ml: # bayes term: log( theta_d | alpha_d ) d_loss_semi_ += self.disc_prior(disc_params) + self.disc_noise(disc_params) # 12 d_losses_semi.append(tf.reshape(d_loss_semi_, [1])) d_loss_semi = tf.reduce_logsumexp(tf.concat(d_losses_semi, 0)) self.d_losses.append(d_loss_semi) ################### total optimizer for semi-supervised discriminator ###################### # after 5000 iterations d_opt_semi = self._get_optimizer( self.d_semi_learning_rate) # what the f**k ?? have you switched the optimizer ?? self.d_optims_semi.append(d_opt_semi.minimize(d_loss_semi, var_list=self.d_vars[di])) # default iterations d_opt_semi_adam = tf.train.AdamOptimizer(learning_rate=self.d_semi_learning_rate, beta1=0.5) self.d_optims_semi_adam.append(d_opt_semi_adam.minimize(d_loss_semi, var_list=self.d_vars[di])) ############################ build generator losses and optimizers ########################################## self.g_learning_rate = tf.placeholder(tf.float32, shape=[]) self.g_vars = [] for gi in range(self.num_gen): for m in range(self.num_mcmc): self.g_vars.append([var for var in t_vars if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name]) self.g_losses, self.g_optims_semi, self.g_optims_semi_adam = [], [], [] for gi, gen_params in enumerate(self.gen_param_list): gi_losses = [] for disc_params in self.disc_param_list: d_probs_fake, d_logits_fake, d_features_fake = self.discriminator(self.generator(self.z[:, :, gi % self.num_gen], gen_params), self.K, disc_params, reuse=tf.AUTO_REUSE) _, _, d_features_real = self.discriminator(self.inputs, self.K, disc_params, reuse=tf.AUTO_REUSE) # JT-0228: g_loss_ = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_probs_fake))) g_loss_ = -tf.reduce_mean(tf.reduce_logsumexp(d_logits_fake, 1)) + tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_fake, 1))) g_loss_ += tf.reduce_mean(huber_loss(d_features_real[-1], d_features_fake[-1])) ## Huber loss is a variation of the squared loss, which is more robust to noise if not self.ml: # return the prior_loss + noise_loss g_loss_ += self.gen_prior(gen_params) + self.gen_noise(gen_params) # 10 gi_losses.append(tf.reshape(g_loss_, [1])) g_loss = tf.reduce_logsumexp(tf.concat(gi_losses, 0)) self.g_losses.append(g_loss) ################### total optimizer for semi-supervised generator ###################### g_opt = self._get_optimizer(self.g_learning_rate) self.g_optims_semi.append(g_opt.minimize(g_loss, var_list=self.g_vars[gi])) g_opt_adam = tf.train.AdamOptimizer(learning_rate=self.g_learning_rate, beta1=0.5) self.g_optims_semi_adam.append(g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi])) self.gen_samplers = [] ### build samplers for gi, gen_params in enumerate(self.gen_param_list): self.gen_samplers.append(self.generator(self.z_sampler, gen_params)) ### build vanilla supervised loss self.lbls = tf.placeholder(tf.float32, [self.batch_size, self.K], name='real_sup_targets') # create a place for the variables,and then pass the real numbers self.S, self.S_logits = self.sup_discriminator(self.inputs, self.K) self.s_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.S_logits, labels=self.lbls)) ################### total optimizer for semi-supervised discrinimator ###################### t_vars = tf.trainable_variables() self.sup_vars = [var for var in t_vars if 'sup_' in var.name] supervised_lr = 0.05 * self.lr s_opt = self._get_optimizer(supervised_lr) self.s_optim = s_opt.minimize(self.s_loss, var_list=self.sup_vars) s_opt_adam = tf.train.AdamOptimizer(learning_rate=supervised_lr, beta1=0.5) # what the f**k? is adam the SGHMC you mentioned in the work ?? self.s_optim_adam = s_opt_adam.minimize(self.s_loss, var_list=self.sup_vars) def build_test_graph(self): self.test_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_test_images') self.test_d_probs, self.test_d_logits = [], [] # self.test_d_probs : 2 x (64, 10) for disc_params in self.disc_param_list: # no generator, just discriminator test_d_probs_, test_d_logits_, _ = self.discriminator(self.test_inputs, self.K, disc_params, train=False, reuse=True) self.test_d_probs.append(test_d_probs_) # test_d_probs_.shape = (64, 10) self.test_d_logits.append(test_d_logits_) # build standard purely supervised losses and optimizers self.test_s_probs, self.test_s_logits = self.sup_discriminator(self.test_inputs, self.K) def sup_discriminator(self, image, K): # TODO collapse this into disc with tf.variable_scope("sup_discriminator", reuse=tf.AUTO_REUSE) as scope: h0 = lrelu(conv2d(image, self.df_dim, name='sup_h0_conv')) h1 = lrelu(self.sup_d_batch_norm.sd_bn1(conv2d(h0, self.df_dim * 2, name='sup_h1_conv'))) h2 = lrelu(self.sup_d_batch_norm.sd_bn2(conv2d(h1, self.df_dim * 4, name='sup_h2_conv'))) h3 = lrelu(self.sup_d_batch_norm.sd_bn3(conv2d(h2, self.df_dim * 8, name='sup_h3_conv'))) h4 = linear(tf.reshape(h3, [self.batch_size, -1]), K, 'sup_h3_lin') return tf.nn.softmax(h4), h4 def discriminator(self, image, K, disc_params, train=True, reuse=False): with tf.variable_scope("discriminator", reuse=reuse) as scope: # reuse=tf.AUTO_REUSE h = image for layer in range(len(self.disc_strides)): if layer == 0: h = lrelu(conv2d(h, self.disc_weight_dims["d_h%i_W" % layer][-1], name='d_h%i_conv' % layer, k_h=self.disc_kernel_sizes[layer], k_w=self.disc_kernel_sizes[layer], d_h=self.disc_strides[layer], d_w=self.disc_strides[layer], w=disc_params["d_h%i_W" % layer], biases=disc_params["d_h%i_b" % layer])) # conv - bn - relu else: h = lrelu(self.d_batch_norm["d_bn%i" % layer]( conv2d(h, self.disc_weight_dims["d_h%i_W" % layer][-1], name='d_h%i_conv' % layer, k_h=self.disc_kernel_sizes[layer], k_w=self.disc_kernel_sizes[layer], d_h=self.disc_strides[layer], d_w=self.disc_strides[layer], w=disc_params["d_h%i_W" % layer], biases=disc_params["d_h%i_b" % layer]), train=train)) h_end = lrelu(linear(tf.reshape(h, [self.batch_size, -1]), self.df_dim * 4, "d_h_end_lin", matrix=disc_params.d_h_end_lin_W, bias=disc_params.d_h_end_lin_b)) # for feature norm h_out = linear(h_end, K, 'd_h_out_lin', matrix=disc_params.d_h_out_lin_W, bias=disc_params.d_h_out_lin_b) return tf.nn.softmax(h_out), h_out, [h_end] def generator(self, z, gen_params): with tf.variable_scope("generator", reuse=tf.AUTO_REUSE) as scope: h = linear(z, self.gen_weight_dims["g_h0_lin_W"][-1], 'g_h0_lin', matrix=gen_params.g_h0_lin_W, bias=gen_params.g_h0_lin_b) h = tf.nn.relu(self.g_batch_norm.g_bn0(h)) h = tf.reshape(h, [self.batch_size, self.gen_output_dims["g_h0_out"][0], self.gen_output_dims["g_h0_out"][1], -1]) for layer in range(1, len(self.gen_strides) + 1): out_shape = [self.batch_size, self.gen_output_dims["g_h%i_out" % layer][0], self.gen_output_dims["g_h%i_out" % layer][1], self.gen_weight_dims["g_h%i_W" % layer][-2]] h = deconv2d(h, out_shape, k_h=self.gen_kernel_sizes[layer - 1], k_w=self.gen_kernel_sizes[layer - 1], d_h=self.gen_strides[layer - 1], d_w=self.gen_strides[layer - 1], name='g_h%i' % layer, w=gen_params["g_h%i_W" % layer], biases=gen_params["g_h%i_b" % layer]) if layer < len(self.gen_strides): h = tf.nn.relu(self.g_batch_norm["g_bn%i" % layer](h)) return tf.nn.tanh(h) def gen_prior(self, gen_params): with tf.variable_scope("generator") as scope: prior_loss = 0.0 for var in gen_params.values(): nn = tf.divide(var, self.prior_std) prior_loss += tf.reduce_mean(tf.multiply(nn, nn)) prior_loss /= self.dataset_size return prior_loss def gen_noise(self, gen_params): # noise_ : gaussian distribution with tf.variable_scope("generator") as scope: noise_loss = 0.0 for name, var in gen_params.items(): # .iteritems(): noise_ = tf.distributions.Normal(loc=0., scale=self.noise_std * tf.ones(var.get_shape())) # tf.contrib.distributions.Normal(mu=0., sigma=self.noise_std*tf.ones(var.get_shape())) noise_loss += tf.reduce_sum(var * noise_.sample()) noise_loss /= self.dataset_size return noise_loss def disc_prior(self, disc_params): with tf.variable_scope("discriminator") as scope: prior_loss = 0.0 for var in disc_params.values(): # print("var_disc_prior shape = ", var.get_shape(), var) # (5, 5, 3, 96) <tf.Variable 'discriminator/d_h0_W_0000_0000:0' shape=(5, 5, 3, 96) dtype=float32_ref> nn = tf.divide(var, self.prior_std) prior_loss += tf.reduce_mean(tf.multiply(nn, nn)) prior_loss /= self.dataset_size return prior_loss def disc_noise(self, disc_params): with tf.variable_scope("discriminator") as scope: noise_loss = 0.0 for var in disc_params.values(): noise_ = tf.distributions.Normal(loc=0., scale=self.noise_std * tf.ones(var.get_shape())) # tf.contrib.distributions.Normal(mu=0., sigma=self.noise_std*tf.ones(var.get_shape())) noise_loss += tf.reduce_sum(var * noise_.sample()) noise_loss /= self.dataset_size return noise_loss