def create_loss(): """Creates the loss to be optimized. Returns: bound: A float Tensor containing the value of the bound that is being optimized. loss: A float Tensor that when differentiated yields the gradients to apply to the model. Should be optimized via gradient descent. """ inputs, targets, lengths, model = create_dataset_and_model( config, split="train", shuffle=True, repeat=True) # Compute lower bounds on the log likelihood. if config.bound == "elbo": ll_per_seq, _, _, _ = bounds.iwae( model, (inputs, targets), lengths, num_samples=1) elif config.bound == "iwae": ll_per_seq, _, _, _ = bounds.iwae( model, (inputs, targets), lengths, num_samples=config.num_samples) elif config.bound == "fivo": ll_per_seq, _, _, _, _ = bounds.fivo( model, (inputs, targets), lengths, num_samples=config.num_samples, resampling_criterion=bounds.ess_criterion) # Compute loss scaled by number of timesteps. ll_per_t = tf.reduce_mean(ll_per_seq / tf.to_float(lengths)) ll_per_seq = tf.reduce_mean(ll_per_seq) tf.summary.scalar("train_ll_per_seq", ll_per_seq) tf.summary.scalar("train_ll_per_t", ll_per_t) if config.normalize_by_seq_len: return ll_per_t, -ll_per_t else: return ll_per_seq, -ll_per_seq
def __init__(self, sess, dataset_name='facades', checkpoint_dir=None): self.sess = sess self.dataset_name = dataset_name self.checkpoint_dir = checkpoint_dir self.real_data = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3 + 3], name='input_images') self.real_A = self.real_data[:, :, :, :3] self.real_B = self.real_data[:, :, :, 3:6] self.fake_B = generator(self.real_A, name="generatorA2B") self.fake_A = generator(self.real_B, name="generatorB2A") self.fake_B_fake_A = generator(self.fake_B, reuse=True, name="generatorB2A") self.fake_A_fake_B = generator(self.fake_A, reuse=True, name="generatorA2B") self.DA_real = discriminator(self.real_A, reuse=False, name="descriminatorA") self.DB_real = discriminator(self.real_B, reuse=False, name="descriminatorB") self.DA_fake = discriminator(self.fake_A, reuse=True, name="descriminatorA") self.DB_fake = discriminator(self.fake_B, reuse=True, name="descriminatorB") self.g_loss_a2b = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.DB_fake, labels=tf.ones_like(self.DB_fake))) + 100 * tf.reduce_mean( tf.abs(self.real_A - self.fake_B_fake_A)) + 100 * tf.reduce_mean( tf.abs(self.real_B - self.fake_B)) self.g_loss_b2a = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.DA_fake, labels=tf.ones_like(self.DA_fake))) + 100 * tf.reduce_mean( tf.abs(self.real_B - self.fake_A_fake_B)) + 100 * tf.reduce_mean( tf.abs(self.real_A - self.fake_A)) self.g_loss = self.g_loss_a2b + self.g_loss_b2a self.d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.DB_fake, labels=tf.zeros_like(self.DB_fake))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.DB_real, labels=tf.ones_like(self.DB_real))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.DA_fake, labels=tf.zeros_like(self.DA_fake))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.DA_real, labels=tf.ones_like(self.DA_real))) self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss) self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss) self.g_loss_a2b_sum = tf.summary.scalar("g_loss_a2b", self.g_loss_a2b) self.g_loss_b2a_sum = tf.summary.scalar("g_loss_b2a", self.g_loss_b2a) self.real_A_sum = tf.summary.image("real_A", self.real_A) self.real_B_sum = tf.summary.image("real_B", self.real_B) self.fake_A_sum = tf.summary.image("fake_A", self.fake_A) self.fake_B_sum = tf.summary.image("fake_B", self.fake_B) self.fake_AB_sum = tf.summary.image("fake_AB", self.fake_A_fake_B) self.fake_BA_sum = tf.summary.image("fake_BA", self.fake_B_fake_A) self.d_sum = tf.summary.merge([self.d_loss_sum]) self.g_sum = tf.summary.merge([self.g_loss_sum, self.g_loss_a2b_sum, self.g_loss_b2a_sum, self.real_A_sum, self.real_B_sum, self.fake_A_sum, self.fake_B_sum, self.fake_AB_sum, self.fake_BA_sum]) training_vars = tf.trainable_variables() self.d_vars = [var for var in training_vars if 'd_' in var.name] self.g_vars = [var for var in training_vars if 'g_' in var.name] self.saver = tf.train.Saver(max_to_keep=5)
def build_graph(self, nn_im_w, nn_im_h, num_colour_channels=3, weights=None, biases=None): num_outputs = 1 #ofc self.nn_im_w = nn_im_w self.nn_im_h = nn_im_h if weights is None: weights = [None, None, None, None, None] if biases is None: biases = [None, None, None, None, None] with tf.device('/cpu:0'): # Placeholder variables for the input image and output images self.x = tf.placeholder(tf.float32, shape=[None, nn_im_w*nn_im_h*3]) self.y_ = tf.placeholder(tf.float32, shape=[None, num_outputs]) self.threshold = tf.placeholder(tf.float32) # Build the convolutional and pooling layers conv1_output_channels = 32 conv2_output_channels = 16 conv3_output_channels = 8 conv_layer_1_input = tf.reshape(self.x, [-1, nn_im_h, nn_im_w, num_colour_channels]) #The resized input image self.build_conv_layer(conv_layer_1_input, num_colour_channels, conv1_output_channels, initial_weights=weights[0], initial_biases=biases[0]) # layer 1 self.build_conv_layer(self.layers[0][0], conv1_output_channels, conv2_output_channels, initial_weights=weights[1], initial_biases=biases[1])# layer 2 self.build_conv_layer(self.layers[1][0], conv2_output_channels, conv3_output_channels, initial_weights=weights[2], initial_biases=biases[2])# layer 3 # Build the fully connected layer convnet_output_w = nn_im_w//8 convnet_output_h = nn_im_h//8 fully_connected_layer_input = tf.reshape(self.layers[2][0], [-1, convnet_output_w * convnet_output_h * conv3_output_channels]) self.build_fully_connected_layer(fully_connected_layer_input, convnet_output_w, convnet_output_h, conv3_output_channels, initial_weights=weights[3], initial_biases=biases[3]) # The dropout stage and readout layer self.keep_prob, self.h_drop = self.dropout(self.layers[3][0]) self.y_conv,_,_ = self.build_readout_layer(self.h_drop, num_outputs, initial_weights=weights[4], initial_biases=biases[4]) self.mean_error = tf.sqrt(tf.reduce_mean(tf.square(self.y_ - self.y_conv))) self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.mean_error) self.accuracy = (1.0 - tf.reduce_mean(tf.abs(self.y_ - tf.round(self.y_conv)))) positive_examples = tf.greater_equal(self.y_, 0.5) negative_examples = tf.logical_not(positive_examples) positive_classifications = tf.greater_equal(self.y_conv, self.threshold) negative_classifications = tf.logical_not(positive_classifications) self.true_positive = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, positive_classifications),tf.int32)) # count the examples that are positive and classified as positive self.false_positive = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, positive_classifications),tf.int32)) # count the examples that are negative but classified as positive self.true_negative = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, negative_classifications),tf.int32)) # count the examples that are negative and classified as negative self.false_negative = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, negative_classifications),tf.int32)) # count the examples that are positive but classified as negative self.positive_count = tf.reduce_sum(tf.cast(positive_examples, tf.int32)) # count the examples that are positive self.negative_count = tf.reduce_sum(tf.cast(negative_examples, tf.int32)) # count the examples that are negative self.confusion_matrix = tf.reshape(tf.pack([self.true_positive, self.false_positive, self.false_negative, self.true_negative]), [2,2]) self.sess.run(tf.initialize_all_variables())
def __init__(self, config): self.config = config self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input') self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels') self.labels_one_hot = tf.one_hot(indices=self.labels, depth=config.output_dim, on_value=1.0, off_value=0.0, axis=-1) self.gru = GRUCell(config.hidden_state_dim) embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0)) self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input) inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)] outputs, last_slu_state = tf.nn.rnn( cell=self.gru, inputs=inputs, dtype=tf.float32,) w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0)) self.logits = logits_bo = tf.matmul(last_slu_state, w_project) tf.histogram_summary('logits', logits_bo) self.probabilities = tf.nn.softmax(logits_bo) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot)) self.predict = tf.nn.softmax(logits_bo) # TensorBoard self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy') tf.scalar_summary('CCE loss', self.loss) tf.scalar_summary('Accuracy', self.accuracy) self.tb_info = tf.merge_all_summaries()
def get_rebar_gradient(self): """Get the rebar gradient.""" hardELBO, nvil_gradient, logQHard = self._create_hard_elbo() if self.hparams.quadratic: gumbel_cv, _ = self._create_gumbel_control_variate_quadratic(logQHard) else: gumbel_cv, _ = self._create_gumbel_control_variate(logQHard) f_grads = self.optimizer_class.compute_gradients(tf.reduce_mean(-nvil_gradient)) eta = {} h_grads, eta_statistics = self.multiply_by_eta_per_layer( self.optimizer_class.compute_gradients(tf.reduce_mean(gumbel_cv)), eta) model_grads = U.add_grads_and_vars(f_grads, h_grads) total_grads = model_grads # Construct the variance objective variance_objective = tf.reduce_mean(tf.square(U.vectorize(model_grads, set_none_to_zero=True))) debug = { 'ELBO': hardELBO, 'etas': eta_statistics, 'variance_objective': variance_objective, } return total_grads, debug, variance_objective
def __init__(self): # Import data error = None for _ in range(10): try: self.mnist = input_data.read_data_sets( "/tmp/tensorflow/mnist/input_data", one_hot=True) error = None break except Exception as e: error = e time.sleep(5) if error: raise ValueError("Failed to import data", error) # Set seed and build layers tf.set_random_seed(0) self.x = tf.placeholder(tf.float32, [None, 784], name="x") self.y_ = tf.placeholder(tf.float32, [None, 10], name="y_") y_conv, self.keep_prob = deepnn(self.x) # Need to define loss and optimizer attributes self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=self.y_, logits=y_conv)) self.optimizer = tf.train.AdamOptimizer(1e-4) self.variables = ray_tf_utils.TensorFlowVariables( self.loss, tf.get_default_session()) # For evaluating test accuracy correct_prediction = tf.equal( tf.argmax(y_conv, 1), tf.argmax(self.y_, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def fprop_noscope(self, x): mean = tf.reduce_mean(x, (1, 2), keep_dims=True) x = x - mean std = tf.sqrt(1e-7 + tf.reduce_mean(tf.square(x), (1, 2), keep_dims=True)) x = x / std return x * self.gamma + self.beta
def testGradient(self): s = [2, 3, 4, 2] x = np.arange(1.0, 49.0).reshape(s).astype(np.float32) with self.test_session(): t = tf.convert_to_tensor(x) su = tf.reduce_mean(t, [1, 2]) jacob_t, jacob_n = tf.test.compute_gradient(t, s, su, [2, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = tf.reduce_mean(t, [0, 1, 2, 3]) jacob_t, jacob_n = tf.test.compute_gradient(t, s, su, [1], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3) su = tf.reduce_mean(t, []) jacob_t, jacob_n = tf.test.compute_gradient(t, s, su, [2, 3, 4, 2], x_init_value=x, delta=1) self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
def standard_reg(): reg = tf.constant(0.0, dtype=tf.float32) reg = reg + standard_w_weight_reg * tf.reduce_mean(tf.square(net_params['sDW1'])) #reg = reg + standard_w_weight_reg * tf.reduce_mean(tf.square(net_params['sDW2'])) reg = reg + regressor_w_weight_reg * tf.reduce_mean(tf.square(net_params['sRW'])) return reg
def get_losses(obj_mask): """Get motion constraint loss.""" # Find height of segment. coords = tf.where(tf.greater( # Shape (num_true, 2=yx) obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32))) y_max = tf.reduce_max(coords[:, 0]) y_min = tf.reduce_min(coords[:, 0]) seg_height = y_max - y_min f_y = self.intrinsic_mat[i, 0, 1, 1] approx_depth = ((f_y * self.global_scale_var) / tf.to_float(seg_height)) reference_pred = tf.boolean_mask( depth_pred, tf.greater( tf.reshape(obj_mask[:, :, 0], (self.img_height, self.img_width, 1)), tf.constant(0.5, dtype=tf.float32))) # Establish loss on approx_depth, a scalar, and # reference_pred, our dense prediction. Normalize both to # prevent degenerative depth shrinking. global_mean_depth_pred = tf.reduce_mean(depth_pred) reference_pred /= global_mean_depth_pred approx_depth /= global_mean_depth_pred spatial_err = tf.abs(reference_pred - approx_depth) mean_spatial_err = tf.reduce_mean(spatial_err) return mean_spatial_err
def variable_summaries(var, name, collection_key): """Attach a lot of summaries to a Tensor (for TensorBoard visualization). Args: - var: Tensor for variable from which we want to log. - name: Variable name. - collection_key: Collection to save the summary to, can be any key of `VAR_LOG_LEVELS`. """ if collection_key not in VAR_LOG_LEVELS.keys(): raise ValueError('"{}" not in `VAR_LOG_LEVELS`'.format(collection_key)) collections = VAR_LOG_LEVELS[collection_key] with tf.name_scope(name): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean, collections) num_params = tf.reduce_prod(tf.shape(var)) tf.summary.scalar('num_params', num_params, collections) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev, collections) tf.summary.scalar('max', tf.reduce_max(var), collections) tf.summary.scalar('min', tf.reduce_min(var), collections) tf.summary.histogram('histogram', var, collections) tf.summary.scalar('sparsity', tf.nn.zero_fraction(var), collections)
def soft_triplet_loss(anchor, positive, negative, extra=True, scope="soft_triplet_loss"): r"""Loss for triplet networks as described in the paper: `Deep Metric Learning using Triplet Network <https://arxiv.org/abs/1412.6622>`_ by Hoffer et al. It is a softmax loss using :math:`(anchor-positive)^2` and :math:`(anchor-negative)^2` as logits. Args: anchor (tf.Tensor): anchor feature vectors of shape [Batch, N]. positive (tf.Tensor): features of positive match of the same shape. negative (tf.Tensor): features of negative match of the same shape. extra (bool): also return distances for pos and neg. Returns: tf.Tensor: triplet-loss as scalar (and optionally average_pos_dist, average_neg_dist) """ eps = 1e-10 with tf.name_scope(scope): d_pos = tf.sqrt(tf.reduce_sum(tf.square(anchor - positive), 1) + eps) d_neg = tf.sqrt(tf.reduce_sum(tf.square(anchor - negative), 1) + eps) logits = tf.stack([d_pos, d_neg], axis=1) ones = tf.ones_like(tf.squeeze(d_pos), dtype="int32") loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ones)) if extra: pos_dist = tf.reduce_mean(d_pos, name='pos-dist') neg_dist = tf.reduce_mean(d_neg, name='neg-dist') return loss, pos_dist, neg_dist else: return loss
def __init__(self, nA, learning_rate,decay,grad_clip,entropy_beta, state_shape=[84,84,4], master=None, device_name='/gpu:0', scope_name='master'): with tf.device(device_name) : self.state = tf.placeholder(tf.float32,[None]+state_shape) block, self.scope = ActorCritic._build_shared_block(self.state,scope_name) self.policy, self.log_softmax_policy = ActorCritic._build_policy(block,nA,scope_name) self.value = ActorCritic._build_value(block,scope_name) self.train_vars = sorted(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope.name), key=lambda v:v.name) if( master is not None ) : self.sync_op= self._sync_op(master) self.action = tf.placeholder(tf.int32,[None,]) self.target_value = tf.placeholder(tf.float32,[None,]) advantage = self.target_value - self.value entropy = tf.reduce_sum(-1. * self.policy * self.log_softmax_policy,axis=1) log_p_s_a = tf.reduce_sum(self.log_softmax_policy * tf.one_hot(self.action,nA),axis=1) self.policy_loss = tf.reduce_mean(tf.stop_gradient(advantage)*log_p_s_a) self.entropy_loss = tf.reduce_mean(entropy) self.value_loss = tf.reduce_mean(advantage**2) loss = -self.policy_loss - entropy_beta* self.entropy_loss + self.value_loss self.gradients = tf.gradients(loss,self.train_vars) clipped_gs = [tf.clip_by_average_norm(g,grad_clip) for g in self.gradients] self.train_op = master.optimizer.apply_gradients(zip(clipped_gs,master.train_vars)) else : #self.optimizer = tf.train.AdamOptimizer(learning_rate,beta1=BETA) self.optimizer = tf.train.RMSPropOptimizer(learning_rate,decay=decay,use_locking=True)
def build_graph(self, image, label): assert tf.test.is_gpu_available() MEAN_IMAGE = tf.constant([0.4914, 0.4822, 0.4465], dtype=tf.float32) STD_IMAGE = tf.constant([0.2023, 0.1994, 0.2010], dtype=tf.float32) image = ((image / 255.0) - MEAN_IMAGE) / STD_IMAGE image = tf.transpose(image, [0, 3, 1, 2]) pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform') with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \ argscope(Conv2D, kernel_initializer=pytorch_default_init): net = Conv2D('conv0', image, 64, kernel_size=3, strides=1, use_bias=False) for i, blocks_in_module in enumerate(MODULE_SIZES): for j in range(blocks_in_module): stride = 2 if j == 0 and i > 0 else 1 with tf.variable_scope("res%d.%d" % (i, j)): net = preactivation_block(net, FILTER_SIZES[i], stride) net = GlobalAvgPooling('gap', net) logits = FullyConnected('linear', net, CLASS_NUM, kernel_initializer=tf.random_normal_initializer(stddev=1e-3)) ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits) ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss') single_label = tf.to_int32(tf.argmax(label, axis=1)) wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, single_label, 1)), name='wrong_vector') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error'), ce_cost) add_param_summary(('.*/W', ['histogram'])) # weight decay on all W matrixes. including convolutional layers wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost') return tf.add_n([ce_cost, wd_cost], name='cost')
def func_for_scan(prev_output, current_element): # Sample decoder weights __, [1], [1] W, log_pW, log_qW = decoder.sample_weights() # Sample z [P,B,Z], [P,B], [P,B] z, log_pz, log_qz = self.sample_z(x, encoder, decoder, W) # z: [PB,Z] z = tf.reshape(z, [self.n_z_particles*self.batch_size, self.z_size]) # Decode [PB,X] y = decoder.feedforward(W, z) # y: [P,B,X] y = tf.reshape(y, [self.n_z_particles, self.batch_size, self.x_size]) # Likelihood p(x|z) [P,B] log_px = log_bern(x,y) #Store for later # log_pW_list.append(tf.reduce_mean(log_pW)) # log_qW_list.append(tf.reduce_mean(log_qW)) # log_pz_list.append(tf.reduce_mean(log_pz)) # log_qz_list.append(tf.reduce_mean(log_qz)) # log_px_list.append(tf.reduce_mean(log_px)) to_output = [] to_output.append(tf.reduce_mean(log_px)) to_output.append(tf.reduce_mean(log_pz)) to_output.append(tf.reduce_mean(log_qz)) to_output.append(tf.reduce_mean(log_pW)) to_output.append(tf.reduce_mean(log_qW)) return tf.stack(to_output)
def create_z_cycloss(self, z, x_hat, encoder, generator): config = self.config ops = self.ops total = None distance = config.distance or ops.lookup('l1_distance') if config.z_hat_lambda: z_hat_cycloss_lambda = config.z_hat_cycloss_lambda recode_z_hat = encoder.reuse(x_hat) z_hat_cycloss = tf.reduce_mean(distance(z_hat,recode_z_hat)) z_hat_cycloss *= z_hat_cycloss_lambda if config.z_cycloss_lambda: recode_z = encoder.reuse(generator.reuse(z)) z_cycloss = tf.reduce_mean(distance(z,recode_z)) z_cycloss_lambda = config.z_cycloss_lambda if z_cycloss_lambda is None: z_cycloss_lambda = 0 z_cycloss *= z_cycloss_lambda if config.z_hat_lambda and config.z_cycloss_lambda: total = z_cycloss + z_hat_cycloss elif config.z_cycloss_lambda: total = z_cycloss elif config.z_hat_lambda: total = z_hat_cycloss return total
def create_graph(self): with self.__graph.as_default(): self.__featurePlaceHolder = tf.placeholder(dtype=tf.int32, shape=[None, self.__window_size * 2]) self.__labelPlaceHolder = tf.placeholder(dtype=tf.int32, shape=[None, 1]) onehot_lookup_tables = tf.Variable( initial_value=tf.truncated_normal(shape=[self.__vocabulary_size, self.__embedding_size]) ) embedding = tf.nn.embedding_lookup(params=onehot_lookup_tables, ids = self.__featurePlaceHolder) projection_out = tf.reduce_mean(embedding, axis=1) softmax_weight = tf.Variable(initial_value=tf.truncated_normal( shape=[self.__vocabulary_size, self.__embedding_size] )) softmax_biases = tf.Variable(initial_value=tf.zeros([self.__vocabulary_size])) sampled_loss_per_batch = tf.nn.sampled_softmax_loss( weights=softmax_weight, biases=softmax_biases, inputs=projection_out, labels=self.__labelPlaceHolder, num_sampled=self.__num_sampled, num_classes=self.__vocabulary_size ) self.__loss = tf.reduce_mean(sampled_loss_per_batch) self.__optimizer = tf.train.AdagradOptimizer(1.0).minimize(self.__loss) norm = tf.sqrt(tf.reduce_sum(tf.square(onehot_lookup_tables), 1, keep_dims=True)) self.__normalized_embedding = onehot_lookup_tables / norm
def __init__(self, num_features, num_output, l2_reg_lambda=0.0, neg_output=False): self.input_x = tf.placeholder(tf.float32, [None, num_features], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_output], name="input_y") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) with tf.name_scope("softmax"): filter_shape = [num_features, num_output] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1)) b = tf.Variable(tf.constant(0.1, shape=[num_output])) self.raw_scores = tf.nn.xw_plus_b(self.input_x, W, b, name="scores") if neg_output: self.scores = tf.nn.elu(self.raw_scores, name="tanh") else: self.scores = tf.nn.relu(self.raw_scores, name="relu") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) with tf.name_scope("loss"): self.losses = tf.square(tf.sub(self.scores, self.input_y)) self.avgloss = tf.reduce_mean(tf.abs(tf.sub(self.scores, self.input_y))) self.loss = tf.reduce_mean(self.losses) + l2_reg_lambda * l2_loss
def get(self, rewards, pads, values, final_values, log_probs, prev_log_probs, target_log_probs, entropies, logits): seq_length = tf.shape(rewards)[0] not_pad = tf.reshape(1 - pads, [seq_length, -1, self.num_samples]) rewards = not_pad * tf.reshape(rewards, [seq_length, -1, self.num_samples]) log_probs = not_pad * tf.reshape(sum(log_probs), [seq_length, -1, self.num_samples]) total_rewards = tf.reduce_sum(rewards, 0) total_log_probs = tf.reduce_sum(log_probs, 0) rewards_and_bonus = (total_rewards + self.bonus_weight * self.get_bonus(total_rewards, total_log_probs)) baseline = tf.reduce_mean(rewards_and_bonus, 1, keep_dims=True) loss = -tf.stop_gradient(rewards_and_bonus - baseline) * total_log_probs loss = tf.reduce_mean(loss) raw_loss = loss # TODO gradient_ops = self.training_ops( loss, learning_rate=self.learning_rate) tf.summary.histogram('log_probs', total_log_probs) tf.summary.histogram('rewards', total_rewards) tf.summary.scalar('avg_rewards', tf.reduce_mean(total_rewards)) tf.summary.scalar('loss', loss) return loss, raw_loss, baseline, gradient_ops, tf.summary.merge_all()
def cnn_setup(x, y, keep_prob, lr, stddev): first_hidden = 32 second_hidden = 64 fc_hidden = 1024 W_conv1 = weight([5, 5, 1, first_hidden], stddev) B_conv1 = bias([first_hidden]) x_image = tf.reshape(x, [-1, 28, 28, 1]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + B_conv1) h_pool1 = max_pool_2x2(h_conv1) W_conv2 = weight([5, 5, first_hidden, second_hidden], stddev) b_conv2 = bias([second_hidden]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) W_fc1 = weight([7 * 7 * second_hidden, fc_hidden], stddev) b_fc1 = bias([fc_hidden]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * second_hidden]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) W_fc2 = weight([fc_hidden, 10], stddev) b_fc2 = bias([10]) y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) cross_entropy = tf.reduce_mean( -tf.reduce_sum(y * tf.log(y_conv), reduction_indices=[1])) correct_pred = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1)) return (tf.train.AdamOptimizer(lr).minimize(cross_entropy), tf.reduce_mean(tf.cast(correct_pred, tf.float32)), cross_entropy)
def test_expected_value(self): shape_ = np.array([2, int(1e3)], np.int32) shape = (tf.constant(shape_) if self.use_static_shape else tf.placeholder_with_default(shape_, shape=None)) # This shape will require broadcasting before sampling. scale_ = np.linspace(0.1, 0.5, 3 * 2).astype(self.dtype).reshape(3, 2) scale = (tf.constant(scale_) if self.use_static_shape else tf.placeholder_with_default(scale_, shape=None)) x = tfp.math.random_rayleigh(shape, scale=scale[..., tf.newaxis], dtype=self.dtype, seed=42) self.assertEqual(self.dtype, x.dtype.as_numpy_dtype) final_shape_ = [3, 2, int(1e3)] if self.use_static_shape: self.assertAllEqual(final_shape_, x.shape) sample_mean = tf.reduce_mean(x, axis=-1, keepdims=True) sample_var = tf.reduce_mean(tf.squared_difference( x, sample_mean), axis=-1) [x_, sample_mean_, sample_var_] = self.evaluate([ x, sample_mean[..., 0], sample_var]) self.assertAllEqual(final_shape_, x_.shape) self.assertAllEqual(np.ones_like(x_, dtype=np.bool), x_ > 0.) self.assertAllClose(np.sqrt(np.pi / 2.) * scale_, sample_mean_, atol=0.05, rtol=0.) self.assertAllClose(0.5 * (4. - np.pi) * scale_**2., sample_var_, atol=0.05, rtol=0.)
def _summarize_input(self, groundtruth_boxes_list, match_list): """Creates tensorflow summaries for the input boxes and anchors. This function creates four summaries corresponding to the average number (over images in a batch) of (1) groundtruth boxes, (2) anchors marked as positive, (3) anchors marked as negative, and (4) anchors marked as ignored. Args: groundtruth_boxes_list: a list of 2-D tensors of shape [num_boxes, 4] containing corners of the groundtruth boxes. match_list: a list of matcher.Match objects encoding the match between anchors and groundtruth boxes for each image of the batch, with rows of the Match objects corresponding to groundtruth boxes and columns corresponding to anchors. """ num_boxes_per_image = tf.stack( [tf.shape(x)[0] for x in groundtruth_boxes_list]) pos_anchors_per_image = tf.stack( [match.num_matched_columns() for match in match_list]) neg_anchors_per_image = tf.stack( [match.num_unmatched_columns() for match in match_list]) ignored_anchors_per_image = tf.stack( [match.num_ignored_columns() for match in match_list]) tf.summary.scalar('Input/AvgNumGroundtruthBoxesPerImage', tf.reduce_mean(tf.to_float(num_boxes_per_image))) tf.summary.scalar('Input/AvgNumPositiveAnchorsPerImage', tf.reduce_mean(tf.to_float(pos_anchors_per_image))) tf.summary.scalar('Input/AvgNumNegativeAnchorsPerImage', tf.reduce_mean(tf.to_float(neg_anchors_per_image))) tf.summary.scalar('Input/AvgNumIgnoredAnchorsPerImage', tf.reduce_mean(tf.to_float(ignored_anchors_per_image)))
def eval_summary(self, ground_truth, prediction): """ Compute evaluation metrics (for EVAL mode). Args: ground_truth: Ground truth, shape: (?, #priors, 4 + #classes). prediction: Dictionary of predicted tensors, shape: {'locs' : (?, #priors, 4), \ 'confs' : (?, #priors, #classes), \ 'logits': (?, #priors, #classes)}. Returns: Loss stub, shape: (1,). """ localization_loss = self._localization_loss(ground_truth[:, :, :4], prediction['locs']) # shape: (batch_size, num_priors) classification_loss = self._classification_loss(ground_truth[:, :, 4:], prediction['logits']) # shape: (batch_size, num_priors) positives = tf.reduce_max(ground_truth[:, :, 5:], axis=-1) # shape: (batch_size, num_priors) num_positives = tf.reduce_sum(positives) # shape: (1,) loc_loss = tf.reduce_sum(localization_loss * positives, axis=-1) # shape: (batch_size,) classification_loss = tf.reduce_sum(classification_loss, axis=-1) # shape: (batch_size,) evaluation_tensors = { 'total_classification_loss': tf.reduce_mean(classification_loss), 'total_localization_loss': tf.reduce_mean(loc_loss), } self.__add_evaluation(evaluation_tensors) total_loss = tf.reduce_mean(classification_loss + self.loc_weight * loc_loss) / tf.maximum(1.0, num_positives) return total_loss
def _potential_scale_reduction_single_state(state, independent_chain_ndims): """potential_scale_reduction for one single state `Tensor`.""" with tf.name_scope( 'potential_scale_reduction_single_state', values=[state, independent_chain_ndims]): # We assume exactly one leading dimension indexes e.g. correlated samples # from each Markov chain. state = tf.convert_to_tensor(state, name='state') sample_ndims = 1 sample_axis = tf.range(0, sample_ndims) chain_axis = tf.range(sample_ndims, sample_ndims + independent_chain_ndims) sample_and_chain_axis = tf.range( 0, sample_ndims + independent_chain_ndims) n = _axis_size(state, sample_axis) m = _axis_size(state, chain_axis) # In the language of Brooks and Gelman (1998), # B / n is the between chain variance, the variance of the chain means. # W is the within sequence variance, the mean of the chain variances. b_div_n = _reduce_variance( tf.reduce_mean(state, sample_axis, keepdims=True), sample_and_chain_axis, biased=False) w = tf.reduce_mean( _reduce_variance(state, sample_axis, keepdims=True, biased=True), sample_and_chain_axis) # sigma^2_+ is an estimate of the true variance, which would be unbiased if # each chain was drawn from the target. c.f. "law of total variance." sigma_2_plus = w + b_div_n return ((m + 1.) / m) * sigma_2_plus / w - (n - 1.) / (m * n)
def init_opt(self): is_recurrent = int(self.policy.recurrent) obs_var = self.env.observation_space.new_tensor_variable( 'obs', extra_dims=1 + is_recurrent, ) action_var = self.env.action_space.new_tensor_variable( 'action', extra_dims=1 + is_recurrent, ) advantage_var = tensor_utils.new_tensor( 'advantage', ndim=1 + is_recurrent, dtype=tf.float32, ) dist = self.policy.distribution old_dist_info_vars = { k: tf.placeholder(tf.float32, shape=[None] * (1 + is_recurrent) + list(shape), name='old_%s' % k) for k, shape in dist.dist_info_specs } old_dist_info_vars_list = [old_dist_info_vars[k] for k in dist.dist_info_keys] state_info_vars = { k: tf.placeholder(tf.float32, shape=[None] * (1 + is_recurrent) + list(shape), name=k) for k, shape in self.policy.state_info_specs } state_info_vars_list = [state_info_vars[k] for k in self.policy.state_info_keys] if is_recurrent: valid_var = tf.placeholder(tf.float32, shape=[None, None], name="valid") else: valid_var = None dist_info_vars = self.policy.dist_info_sym(obs_var, state_info_vars) kl = dist.kl_sym(old_dist_info_vars, dist_info_vars) lr = dist.likelihood_ratio_sym(action_var, old_dist_info_vars, dist_info_vars) if is_recurrent: mean_kl = tf.reduce_sum(kl * valid_var) / tf.reduce_sum(valid_var) surr_loss = - tf.reduce_sum(lr * advantage_var * valid_var) / tf.reduce_sum(valid_var) else: mean_kl = tf.reduce_mean(kl) surr_loss = - tf.reduce_mean(lr * advantage_var) input_list = [ obs_var, action_var, advantage_var, ] + state_info_vars_list + old_dist_info_vars_list if is_recurrent: input_list.append(valid_var) self.optimizer.update_opt( loss=surr_loss, target=self.policy, leq_constraint=(mean_kl, self.step_size), inputs=input_list, constraint_name="mean_kl" ) return dict()
def testSampleConsistentStats(self): loc = np.float32([[-1., 1], [1, -1]]) scale = np.float32([1., 0.5]) n_samp = 1e4 with self.test_session() as sess: ind = tfd.Independent( distribution=tfd.MultivariateNormalDiag( loc=loc, scale_identity_multiplier=scale), reinterpreted_batch_ndims=1) x = ind.sample(int(n_samp), seed=42) sample_mean = tf.reduce_mean(x, axis=0) sample_var = tf.reduce_mean(tf.squared_difference(x, sample_mean), axis=0) sample_std = tf.sqrt(sample_var) sample_entropy = -tf.reduce_mean(ind.log_prob(x), axis=0) [ sample_mean_, sample_var_, sample_std_, sample_entropy_, actual_mean_, actual_var_, actual_std_, actual_entropy_, actual_mode_, ] = sess.run([ sample_mean, sample_var, sample_std, sample_entropy, ind.mean(), ind.variance(), ind.stddev(), ind.entropy(), ind.mode(), ]) self.assertAllClose(sample_mean_, actual_mean_, rtol=0.02, atol=0.) self.assertAllClose(sample_var_, actual_var_, rtol=0.04, atol=0.) self.assertAllClose(sample_std_, actual_std_, rtol=0.02, atol=0.) self.assertAllClose(sample_entropy_, actual_entropy_, rtol=0.01, atol=0.) self.assertAllClose(loc, actual_mode_, rtol=1e-6, atol=0.)
def build_graph(self, image_pos): image_pos = image_pos / 128.0 - 1 z = tf.random_normal([self.batch, self.zdim], name='z_train') z = tf.placeholder_with_default(z, [None, self.zdim], name='z') with argscope([Conv2D, Conv2DTranspose, FullyConnected], kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): image_gen = self.generator(z) tf.summary.image('generated-samples', image_gen, max_outputs=30) alpha = tf.random_uniform(shape=[self.batch, 1, 1, 1], minval=0., maxval=1., name='alpha') interp = image_pos + alpha * (image_gen - image_pos) with tf.variable_scope('discrim'): vecpos = self.discriminator(image_pos) vecneg = self.discriminator(image_gen) vec_interp = self.discriminator(interp) # the Wasserstein-GAN losses self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss') self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss') # the gradient penalty loss gradients = tf.gradients(vec_interp, [interp])[0] gradients = tf.sqrt(tf.reduce_sum(tf.square(gradients), [1, 2, 3])) gradients_rms = symbolic_functions.rms(gradients, 'gradient_rms') gradient_penalty = tf.reduce_mean(tf.square(gradients - 1), name='gradient_penalty') add_moving_summary(self.d_loss, self.g_loss, gradient_penalty, gradients_rms) self.d_loss = tf.add(self.d_loss, 10 * gradient_penalty) self.collect_variables()
def get_train(train_ph_dict,var_dict,var_ph_dict): mid0 = tf.one_hot(train_ph_dict['choice_0'], 9, axis=-1, dtype=tf.float32) mid0 = mid0 * get_q(train_ph_dict['state_0'],var_dict) mid0 = tf.reduce_sum(mid0, reduction_indices=[1]) mid1 = get_q(train_ph_dict['state_1'],var_ph_dict) mid1 = tf.reduce_max(mid1, reduction_indices=[1]) mid1 = mid1 * train_ph_dict['cont'] mid1 = mid1 * tf.constant(TRAIN_BETA) l2r = tf.constant(0.0) cell_count = tf.constant(0.0) for v in var_dict.values(): l2r = l2r + get_l2(v) cell_count = cell_count + tf.to_float(tf.size(v)) l2r = l2r / cell_count l2r = l2r / tf.constant(ELEMENT_L2_FACTOR*ELEMENT_L2_FACTOR) l2r = l2r * tf.constant(L2_WEIGHT) mid = mid0-mid1-train_ph_dict['reward_1'] # mid = mid * mid mid = tf.abs(mid) mid = tf.reduce_mean(mid) score_diff = mid mid = mid + l2r mid = mid + ( tf.abs( tf.reduce_mean(var_dict['b5']) ) * tf.constant(L2_WEIGHT) ) loss = mid mid = tf.train.GradientDescentOptimizer(0.5).minimize(mid,var_list=var_dict.values()) train = mid return train, loss, score_diff
def batchnormalize(X, eps=1e-8, g=None, b=None): if X.get_shape().ndims == 4: mean = tf.reduce_mean(X, [0,1,2]) std = tf.reduce_mean( tf.square(X-mean), [0,1,2] ) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,1,1,-1]) b = tf.reshape(b, [1,1,1,-1]) X = X*g + b elif X.get_shape().ndims == 2: mean = tf.reduce_mean(X, 0) std = tf.reduce_mean(tf.square(X-mean), 0) X = (X-mean) / tf.sqrt(std+eps)#std if g is not None and b is not None: g = tf.reshape(g, [1,-1]) b = tf.reshape(b, [1,-1]) X = X*g + b else: raise NotImplementedError return X
def calc_reward(outputs): outputs = outputs[-1] # look at ONLY THE END of the sequence outputs = tf.reshape(outputs, (batch_size, cell_out_size)) h_a_out = weight_variable((cell_out_size, n_classes)) p_y = tf.nn.softmax(tf.matmul(outputs, h_a_out)) max_p_y = tf.arg_max(p_y, 1) correct_y = tf.cast(labels_placeholder, tf.int64) R = tf.cast(tf.equal(max_p_y, correct_y), tf.float32) # reward per example reward = tf.reduce_mean(R) # overall reward p_loc = gaussian_pdf(mean_locs, sampled_locs) p_loc = tf.reshape(p_loc, (batch_size, glimpses * 2)) R = tf.reshape(R, (batch_size, 1)) J = tf.concat(1, [tf.log(p_y + 1e-5) * onehot_labels_placeholder, tf.log( p_loc + 1e-5) * R]) J = tf.reduce_sum(J, 1) J = tf.reduce_mean(J, 0) cost = -J optimizer = tf.train.AdamOptimizer(lr) train_op = optimizer.minimize(cost) return cost, reward, max_p_y, correct_y, train_op
def generator(self, img_batch): with tf.variable_scope('g_') as vs: """ ----------------------------------------------------------------------------------- ENCODER ----------------------------------------------------------------------------------- """ print('ENCODER') self.en_h0 = conv2d(img_batch, self.channels, 128, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv1") self.en_h0 = tf.nn.relu(tf.contrib.layers.batch_norm(self.en_h0)) add_activation_summary(self.en_h0) print(self.en_h0.get_shape().as_list()) self.en_h1 = conv2d(self.en_h0, 128, 256, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv2") self.en_h1 = tf.contrib.layers.batch_norm(self.en_h1, scope="enc_bn2") self.en_h1 = tf.nn.relu(self.en_h1) add_activation_summary(self.en_h1) print(self.en_h1.get_shape().as_list()) self.en_h2 = conv2d(self.en_h1, 256, 512, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv3") self.en_h2 = tf.contrib.layers.batch_norm(self.en_h2, scope="enc_bn3") self.en_h2 = tf.nn.relu(self.en_h2) add_activation_summary(self.en_h2) print(self.en_h2.get_shape().as_list()) self.en_h3 = conv2d(self.en_h2, 512, 1024, k_h=4, k_w=4, d_w=2, d_h=2, name="enc_conv4") self.en_h3 = tf.contrib.layers.batch_norm(self.en_h3, scope="enc_bn4") self.en_h3 = tf.nn.relu(self.en_h3) add_activation_summary(self.en_h3) print(self.en_h3.get_shape().as_list()) """ ----------------------------------------------------------------------------------- GENERATOR ----------------------------------------------------------------------------------- """ print('GENERATOR') self.z_ = tf.reshape(self.en_h3, [self.batch_size, 2, 2, 1024]) print(self.z_.get_shape().as_list()) self.fg_h1 = tf.image.resize_images(self.z_, [4,4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) self.fg_h1 = conv2d(self.fg_h1, 1024, 512, d_h=1, d_w=1, name="gen_conv1") self.fg_h1 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h1, scope='g_f_bn1'), name='g_f_relu1') add_activation_summary(self.fg_h1) print(self.fg_h1.get_shape().as_list()) self.fg_h2 = tf.image.resize_images(self.fg_h1, [8,8], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) self.fg_h2 = conv2d(self.fg_h2, 512, 256, d_h=1, d_w=1, name="gen_conv2") self.fg_h2 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h2, scope='g_f_bn2'), name='g_f_relu2') add_activation_summary(self.fg_h2) print(self.fg_h2.get_shape().as_list()) self.fg_h3 = tf.image.resize_images(self.fg_h2, [16,16], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) self.fg_h3 = conv2d(self.fg_h3, 256, 128, d_h=1, d_w=1, name="gen_conv3") self.fg_h3 = tf.nn.relu(tf.contrib.layers.batch_norm(self.fg_h3, scope='g_f_bn3'), name='g_f_relu3') add_activation_summary(self.fg_h3) print(self.fg_h3.get_shape().as_list()) self.fg_h4 = tf.image.resize_images(self.fg_h3, [self.crop_size,self.crop_size], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) self.fg_h4 = conv2d(self.fg_h4, 128, self.channels, d_h=1, d_w=1, name="gen_conv4") self.fg_fg = tf.nn.tanh(self.fg_h4, name='g_f_actication') print(self.fg_fg.get_shape().as_list()) gen_reg = tf.reduce_mean(tf.square(img_batch - self.fg_fg)) variables = tf.contrib.framework.get_variables(vs) return self.fg_fg, gen_reg, variables
if __name__ == '__main__': tf.reset_default_graph() # Setup input, e.g. data that changes every batch X = tf.placeholder(tf.float32, [ None, 32, 32, 3 ]) # First dim is None, and gets set automatically based on batch size y = tf.placeholder(tf.int64, [None]) is_training = tf.placeholder(tf.bool) # Construct model tf_pred = simple_model(X, y) # Define loss total_loss = tf.losses.hinge_loss(tf.one_hot(y, 10), logits=tf_pred) mean_loss = tf.reduce_mean(total_loss) # Define optimizer optimizer = tf.train.AdamOptimizer(5e-4) # Set learning rate train_step = optimizer.minimize(mean_loss) data = data_utils.get_preprocessed_CIFAR10('datasets/cifar-10-batches-py', should_transpose=False) Xtr, ytr = data['X_train'], data['y_train'] with tf.Session() as sess: with tf.device('/gpu:0'): sess.run(tf.global_variables_initializer()) run_model(sess, tf_pred, X,
relu1_output=new_relu_layer(input=max1_output,name='relu_layer1') conv2_ouput,weights_conv2=new_convolution_layer(input=relu1_output,num_input_channel=6,filter_size=5,num_filter=16,name='conv_layer2') max2_output=new_pool_layer(input=conv2_ouput,name='maxpool_layer2') relu2_output=new_relu_layer(input=max2_output,name='relu_layer2') num_features=relu2_output.get_shape()[1:4].num_elements() layer_flat=tf.reshape(relu2_output,[-1,num_features]) fc1_output=new_fc_layer(input=layer_flat,num_inputs=num_features,num_outputs=128,name='fc_layer1') relu3_output=new_relu_layer(input=fc1_output,name='relu_layer3') fc2_output=new_fc_layer(input=relu3_output,num_inputs=128,num_outputs=10,name='fc_layer2') with tf.variable_scope("Softmax"): y_pred=tf.nn.softmax(fc2_output) y_pred_class=tf.argmax(y_pred,dimension=1) with tf.variable_scope('entropy'): crossentropy=tf.nn.softmax_cross_entropy_with_logits(logits=fc2_output,labels=y_true) cost = tf.reduce_mean(crossentropy) with tf.variable_scope('optimiser'): optimizer=tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost) with tf.name_scope("accuracy"): correct_prediction = tf.equal(y_pred_class, y_true_cls) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) writer = tf.summary.FileWriter("Training_FileWriter/") writer1 = tf.summary.FileWriter("Validation_FileWriter/") # Add the cost and accuracy to summary tf.summary.scalar('loss', cost) tf.summary.scalar('accuracy', accuracy)
print("{},{}".format(m, n)) housing_data_plus_bias = np.c_[np.ones((m, 1)), housing.data] print(housing_data_plus_bias.shape) scaler = StandardScaler() scaled_housing_data = scaler.fit_transform(housing.data) scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data] print(scaled_housing_data_plus_bias.shape) learning_rate = 0.01 X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X") y = tf.placeholder(tf.float32, shape=(None, 1), name="y") theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name='theta') y_pred = tf.matmul(X, theta, name="predictions") error = y_pred - y mse = tf.reduce_mean(tf.square(error), name="mse") optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(mse) init = tf.global_variables_initializer() options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() n_epochs = 10 batch_size = 100 n_batches = int(np.ceil(m / batch_size)) def fetch_batch(epoch, batch_index, batch_size): know = np.random.seed(epoch * n_batches + batch_index) indices = np.random.randint(m, size=batch_size)
def run(self, dataset): tf.reset_default_graph() x = tf.placeholder(tf.float32, [None, self.n_input, self.n_input]) y = tf.placeholder(tf.float32, [None, self.n_classes]) #preprocess data #maxabsscaler = preprocessing.MaxAbsScaler() dataset.train.data = ( dataset.train.data - np.mean(dataset.train.data)) / np.std( dataset.train.data) #preprocessing.scale(dataset.train.data) dataset.test.data = ( dataset.test.data - np.mean(dataset.test.data)) / np.std( dataset.test.data) #preprocessing.scale(dataset.test.data) # eps = 1e-8 # dataset.train.data = np.log2(dataset.train.data + eps) # dataset.test.data = np.log2(dataset.test.data + eps) # Construct model pred = self.conv(x) result = tf.nn.softmax(pred) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y)) optimizer = tf.train.AdamOptimizer( learning_rate=self.lr).minimize(cost) saver = tf.train.Saver() # Initializing the variables init = tf.global_variables_initializer() # Launch the graph sess = tf.Session() sess.run(init) if self.load: saver.restore(sess, '/tmp/cnn') total_batch = int(dataset.train.num_examples / self.batch_size) # idxs = np.arange(dataset.train.data.shape[1]) # np.random.shuffle(idxs) # Training cycle for epoch in range(self.epochs): avg_cost = 0. dataset.shuffle() # Loop over all batches for i in range(total_batch): batch_x, batch_y = dataset.train.next_batch(self.batch_size, i) #batch_x = dataset.train.permute(batch_x, idxs) _, c, r = sess.run([optimizer, cost, result], feed_dict={ x: batch_x, y: batch_y }) # Compute average loss avg_cost += c / total_batch if self.verbose: print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)) if self.save: saver.save(sess, "/tmp/cnn") # Test model correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) # Calculate accuracy accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) accs = [] total_test_batch = int(dataset.test.num_examples / self.batch_size) for i in range(total_test_batch): batch_x, batch_y = dataset.test.next_batch(self.batch_size, i) #batch_x = dataset.train.permute(batch_x, idxs) accs.append(accuracy.eval({x: batch_x, y: batch_y}, session=sess)) sess.close() print accs return sum(accs) / float(len(accs))
def loop_encode_decode_stateful(seq_len, batch_size, vocab_size, input_tokens, output_tokens, gen_encoder, gen_decoder, enc_units, tf_ratio, train_test, s_stateful, mut_freq, pos_variations_count, batch_step): loss = tf.constant(0.0) global_logits = list() enc_state_f = tf.zeros((batch_size, enc_units)) enc_state_b = tf.zeros((batch_size, enc_units)) n_stateful_batches = int(input_tokens.shape[1] / float(s_stateful)) i_tokens = tf.fill([batch_size, 1], 0) for stateful_index in range(n_stateful_batches): s_batch = input_tokens[:, stateful_index * s_stateful:(stateful_index + 1) * s_stateful] enc_output, enc_state_f, enc_state_b = gen_encoder( [s_batch, enc_state_f, enc_state_b], training=True) dec_state = tf.concat([enc_state_f, enc_state_b], -1) dec_state = tf.math.add( dec_state, tf.random.normal((dec_state.shape[0], dec_state.shape[1]), stddev=enc_stddev)) for t in range(s_batch.shape[1]): dec_result, dec_state = gen_decoder([i_tokens, dec_state], training=True) dec_state = tf.math.add( dec_state, tf.random.normal((dec_state.shape[0], dec_state.shape[1]), stddev=dec_stddev)) orig_t = stateful_index * s_stateful + t if len(output_tokens) > 0: o_tokens = output_tokens[:, orig_t:orig_t + 1] # collect different variations at each POS u_var_distribution = np.array( list(pos_variations_count[str(orig_t)].values())) unique_cls = np.array( list(pos_variations_count[str(orig_t)].keys())) all_cls = tf.repeat(unique_cls, repeats=u_var_distribution).numpy() random.shuffle(all_cls) y = all_cls classes = unique_cls le = LabelEncoder() y_ind = le.fit_transform(y) recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64)) class_wt = recip_freq[le.transform(classes)] beta = 0.9999 s_wts = np.sum(class_wt) class_var_pos = dict() norm_class_var_pos = dict() exp_class_var_pos = dict() real_class_wts = list() for k_i, key in enumerate(unique_cls): # loss input taken from paper: https://arxiv.org/pdf/1901.05555.pdf class_var_pos[key] = class_wt[k_i] #/ float(s_wts) norm_class_var_pos[key] = class_wt[k_i] / float(s_wts) exp_class_var_pos[key] = (1 - beta) / ( 1 - beta**pos_variations_count[str(orig_t)][key]) real_class_wts.append(exp_class_var_pos[key]) '''for key in exp_class_var_pos: exp_class_var_pos[key] = exp_class_var_pos[key] / np.sum(real_class_wts)''' exp_norm_u_var_distribution = np.zeros((batch_size)) uniform_wts = np.zeros((batch_size)) for pos_idx, pos in enumerate( np.reshape(o_tokens, (batch_size, ))): exp_norm_u_var_distribution[pos_idx] = exp_class_var_pos[ pos] #/ float(np.sum(real_class_wts)) exp_norm_u_var_distribution = exp_norm_u_var_distribution / np.sum( exp_norm_u_var_distribution) weighted_loss = tf.reduce_mean( cross_entropy_loss( o_tokens, dec_result, sample_weight=exp_norm_u_var_distribution)) #step_loss = weighted_loss loss += weighted_loss global_logits.append(dec_result) i_tokens = o_tokens global_logits = tf.concat(global_logits, axis=-2) #loss = loss / seq_len return global_logits, gen_encoder, gen_decoder, loss
def loss_function(real, pred): cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=False, reduction='none') loss = cross_entropy(y_true=real, y_pred=pred) loss = tf.reduce_mean(loss) return loss
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(n_hidden) # outputs : [batch_size, len_seq, n_hidden], states : [batch_size, n_hidden] outputs, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, X, dtype=tf.float32) outputs = tf.concat([outputs[0], outputs[1]], 2) # output[0] : lstm_fw, output[1] : lstm_bw outputs = tf.transpose(outputs, [1, 0, 2]) # [n_step, batch_size, n_hidden] outputs = outputs[-1] # [batch_size, n_hidden] model = tf.matmul(outputs, W) + b # the final output result (one-hot format) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=model, labels=Y)) optimizer = tf.train.AdamOptimizer(0.001).minimize(cost) prediction = tf.cast(tf.argmax(model, 1), tf.int32) # Training init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) input_batch, target_batch = make_batch(sentence) for epoch in range(10000): _, loss = sess.run([optimizer, cost], feed_dict={ X: input_batch,
# Build graph real_data = tf.placeholder(tf.float32, shape=[None, OUTPUT_DIM]) input_noise = tf.placeholder(tf.float32, shape=[None, NOISE_DIM]) fake_data = Generator(BATCH_SIZE, input_noise) dis_real, real_noise = Discriminator(real_data) dis_fake, invert_noise = Discriminator(fake_data) gen_params = lib.params_with_name('Generator') dis_params = lib.params_with_name('Discriminator') inv_params = lib.params_with_name('Invertor') # Optimize cost function if MODE == 'wgan-gp': inv_cost = tf.reduce_mean(tf.square(input_noise - invert_noise)) gen_cost = -tf.reduce_mean(dis_fake) dis_cost = tf.reduce_mean(dis_fake) - tf.reduce_mean(dis_real) alpha = tf.random_uniform(shape=[BATCH_SIZE, 1], minval=0., maxval=1.) differences = fake_data - real_data interpolates = real_data + alpha * differences gradients = tf.gradients(Discriminator(interpolates)[0], [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1)) gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2) dis_cost_gp = dis_cost + LAMBDA * gradient_penalty inv_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5, beta2=0.9).minimize(inv_cost, var_list=inv_params) gen_train_op = tf.train.AdamOptimizer(learning_rate=1e-4, beta1=0.5,
def build_model(self): print("Setting up model...") # input_images = First frame of video self.input_images = tf.placeholder(tf.float32, [self.batch_size, self.crop_size, self.crop_size, self.channels]) self.videos_fake, self.gen_reg, self.generator_variables = self.generator(self.input_images) self.fake_min = tf.reduce_min(self.videos_fake) self.fake_max = tf.reduce_max(self.videos_fake) print('Shapes of videos:') print('Original:') print(self.videos.shape) print('Generated:') print(self.videos_fake.shape) self.d_real, self.discriminator_variables = self.discriminator(self.videos, reuse=False) # merging initial frame and generated to create full forecast "video" self.videos_fake = tf.stack([self.input_images, self.videos_fake], axis=1) self.d_fake, _ = self.discriminator(self.videos_fake, reuse=True) self.g_cost_pure = -tf.reduce_mean(self.d_fake) # self.g_cost = self.g_cost_pure + 1000 * self.gen_reg self.d_cost = tf.reduce_mean(self.d_fake) - tf.reduce_mean(self.d_real) self.videos = tf.reshape(self.videos, [self.batch_size, self.frame_size, self.crop_size, self.crop_size, self.channels]) self.videos_fake = tf.reshape(self.videos_fake, [self.batch_size, self.frame_size, self.crop_size, self.crop_size, self.channels]) help_v = [0,0,0,0,0] par = 0 for c,k in zip(self.wvars, range(5)): if c == '1': help_v[k] = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.videos[:,:,:,:,par], self.videos_fake[:,:,:,:,par])))) par += 1 else: help_v[k] = tf.constant(0.0) self.rmse_temp = help_v[0] self.rmse_cc = help_v[1] self.rmse_sh = help_v[2] self.rmse_sp = help_v[3] self.rmse_geo = help_v[4] tf.summary.scalar('rmse_temp', self.rmse_temp) tf.summary.scalar('rmse_cc', self.rmse_cc) tf.summary.scalar('rmse_sh', self.rmse_sh) tf.summary.scalar('rmse_sp', self.rmse_sp) tf.summary.scalar('rmse_geo', self.rmse_geo) self.rmse = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.videos, self.videos_fake)))) # self.mae = tf.metrics.mean_absolute_error(self.videos_fake, self.videos) # error of discriminator failing to evaluate generated sample as fake - good job generator tf.summary.scalar("g_cost_pure", self.g_cost_pure) # diff between original image and created image/sequence in generator tf.summary.scalar("g_cost_regularizer", self.gen_reg) # error of - saying fake is fake and original is original (when fake == orig and orig == fake) tf.summary.scalar("d_cost", self.d_cost) tf.summary.scalar("RMSE_overal", self.rmse) # tf.summary.tensor_summary("MAE", self.mae) alpha = tf.random_uniform( shape=[self.batch_size, 1], minval=0., maxval=1. ) dim = self.frame_size * self.crop_size * self.crop_size * self.channels vid = tf.reshape(self.videos, [self.batch_size, dim]) fake = tf.reshape(self.videos_fake, [self.batch_size, dim]) differences = fake - vid interpolates = vid + (alpha * differences) d_hat, _ = self.discriminator(tf.reshape(interpolates, [self.batch_size, self.frame_size, self.crop_size, self.crop_size, self.channels]), reuse=True) gradients = tf.gradients(d_hat, [interpolates])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2) self.d_penalty = 10 * gradient_penalty tf.summary.scalar('d_penalty', self.d_penalty) self.d_cost_final = self.d_cost + self.d_penalty tf.summary.scalar("d_cost_penalized", self.d_cost_final) self.d_adam, self.g_adam = None, None with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self.d_adam = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \ .minimize(self.d_cost_final, var_list=self.discriminator_variables) self.g_adam_gan = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \ .minimize(self.g_cost_pure, var_list=self.generator_variables) self.g_adam_first = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1, beta2=0.999) \ .minimize(self.gen_reg, var_list=self.generator_variables) self.sample = self.videos_fake self.summary_op = tf.summary.merge_all()
with tf.name_scope("pool3"): pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID") pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14]) pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=training) with tf.name_scope("fc1"): fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name="fc1") fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=training) with tf.name_scope("output"): logits = tf.layers.dense(fc1, n_outputs, name="output") Y_proba = tf.nn.softmax(logits, name="Y_proba") with tf.name_scope("train"): xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y) loss = tf.reduce_mean(xentropy) optimizer = tf.train.GradientDescentOptimizer(0.05) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.device('/CPU:0'): with tf.name_scope("eval"): correct = tf.nn.in_top_k(logits, y, 1) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) saver = tf.train.Saver() #graph = tf.get_default_graph() #writer = tf.summary.FileWriter("./simple_graph_events2") #writer.add_graph(graph=graph)
padding='SAME') YY = tf.reshape(Y3_pool, shape=[-1, 6 * 1 * M]) Y4l = tf.matmul(YY, W4) Y4bn, update_ema4 = batchnorm(Y4l, tst, iter, B4) Y4r = tf.nn.relu(Y4bn) Y4 = tf.nn.dropout(Y4r, pkeep) Ylogits = tf.matmul(Y4, W5) + B5 Y = tf.nn.softmax(Ylogits) update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4) cross_entropy_ = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_) cross_entropy = tf.reduce_mean(cross_entropy_) * 100 correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print('Model built!') dataX, dataY = load_data(test_file) dataset = Dataset(dataX, dataY) __X, __Y = dataset.minibatch(len(dataY)) print("Data loaded!") prediction = [] for k in range(k_fold):
def __init__( self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer """ <Variable> - W: 각 단어의 임베디드 벡터의 성분을 랜덤하게 할당 """ #with tf.device('/gpu:0'), tf.name_scope("embedding"): with tf.device('/cpu:0'), tf.name_scope("embedding"): W = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W") self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(3, pooled_outputs) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") # xw_plus_b = matmul(x, W) + b self.predictions = tf.argmax(self.scores, 1, name="predictions") # Calculate Mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def __init__(self, is_training=True, vocab_len=None, tw_vocab_len=None, vocab_overlap=None): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.max_turn, hp.maxlen)) self.x_length = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.max_turn)) self.y = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.maxlen)) self.y_twrp = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.maxlen)) self.y_tw = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.tw_maxlen)) self.y_decoder_input = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.maxlen)) else: # inference self.x = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.max_turn, hp.maxlen)) self.x_length = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.max_turn)) self.y = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.maxlen)) self.y_tw = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.tw_maxlen)) self.y_decoder_input = tf.placeholder(tf.int32, shape=(hp.batch_size, hp.maxlen)) self.tw_vocab_overlap = tf.constant(vocab_overlap, name='Const', dtype='float32') # define decoder inputs self.decoder_inputs = tf.concat( (tf.ones_like(self.y_decoder_input[:, :1]) * 2, self.y_decoder_input[:, :-1]), -1) # 2:<S> ## Word Embedding self.enc_embed = get_token_embeddings(tf.reshape( self.x, [-1, hp.maxlen]), vocab_size=vocab_len, num_units=hp.hidden_units) ## Topic Word Embedding self.tw_embed = get_token_embeddings(self.y_tw, vocab_size=vocab_len, num_units=hp.hidden_units) ## Word Embedding self.dec_embed = get_token_embeddings(self.decoder_inputs, vocab_size=vocab_len, num_units=hp.hidden_units) # Get Vocab Embedding self.embeddings = get_token_embeddings(inputs=None, vocab_size=vocab_len, num_units=hp.hidden_units, get_embedtable=True) # Encoder with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE): # Hierarchical Self-Attention reshape self.x_resha = tf.reshape(self.x, [-1, hp.maxlen]) # (N, S_maxlen) # Word src_masks src_masks_w = tf.math.equal(self.x_resha, 0) # (N, S_maxlen) ## Word Positional Encoding self.enc = self.enc_embed + positional_encoding( self.enc_embed, hp.maxlen) self.enc = tf.layers.dropout(self.enc, hp.dropout_rate, training=is_training) ## Word Blocks for i in range(hp.num_blocks_w): with tf.variable_scope("num_blocks_w{}".format(i), reuse=tf.AUTO_REUSE): # self-attention self.enc, self.att_w = multihead_attention( queries=self.enc, keys=self.enc, values=self.enc, key_masks=src_masks_w, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=is_training, causality=True) # feed forward self.enc = ff( self.enc, num_units=[4 * hp.hidden_units, hp.hidden_units]) # Hierarchical Self-Attention reshape self.enc = tf.reshape( self.enc, [hp.batch_size, hp.max_turn, hp.maxlen, hp.hidden_units]) self.enc = tf.reduce_mean(self.enc, axis=2) # (N,max_turn,C) # Utterance which has been padded makes the Utterance vector for 0 regardless of self-attention x_length_mat = tf.not_equal(self.x_length, 0) # (N, max_turn) x_length_mat = tf.expand_dims(x_length_mat, -1) # (N, max_turn, 1) x_length_mat = tf.tile(x_length_mat, multiples=[1, 1, hp.hidden_units ]) # (N, max_turn, C) zeros_mat = tf.zeros( [hp.batch_size, hp.max_turn, hp.hidden_units], dtype=tf.float32) self.enc = tf.where(x_length_mat, self.enc, zeros_mat) self.enc = ff(self.enc, num_units=[4 * hp.hidden_units, hp.hidden_units]) # Uatterance src_masks src_masks_u = tf.math.equal(self.x_length, 0) # (N, max_turn) ## Uatterance Positional Encoding self.enc = self.enc + positional_encoding( self.enc, hp.max_turn) self.enc = tf.layers.dropout(self.enc, hp.dropout_rate, training=is_training) ## Uatterance Blocks for i in range(hp.num_blocks_u): with tf.variable_scope("num_blocks_u{}".format(i), reuse=tf.AUTO_REUSE): # self-attention self.enc, self.att_u = multihead_attention( queries=self.enc, keys=self.enc, values=self.enc, key_masks=src_masks_u, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=is_training, causality=False) # feed forward self.enc = ff( self.enc, num_units=[4 * hp.hidden_units, hp.hidden_units]) # Decoder with tf.variable_scope("decoder", reuse=tf.AUTO_REUSE): # tgt_masks tgt_masks = tf.math.equal(self.decoder_inputs, 0) # (N, T2) ## Positional Encoding self.dec = self.dec_embed + positional_encoding( self.dec_embed, hp.maxlen) self.dec = tf.layers.dropout(self.dec, hp.dropout_rate, training=is_training) # Blocks for i in range(hp.num_blocks): with tf.variable_scope("num_blocks_{}".format(i), reuse=tf.AUTO_REUSE): # Masked self-attention (Note that causality is True at this time) self.dec, _ = multihead_attention( queries=self.dec, keys=self.dec, values=self.dec, key_masks=tgt_masks, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=is_training, causality=True, scope="self_attention") # Vanilla attention self.dec, self.att_v = multihead_attention( queries=self.dec, keys=self.enc, values=self.enc, key_masks=src_masks_u, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=is_training, causality=False, scope="vanilla_attention") ### Feed Forward self.dec = ff( self.dec, num_units=[4 * hp.hidden_units, hp.hidden_units]) if i >= hp.num_blocks - 1: self.future_blindness, _ = multihead_attention( queries=self.dec, keys=self.dec, values=self.dec, key_masks=tgt_masks, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, training=is_training, causality=True, scope="self_attention") ## Topic Word Attention self.twdec = topic_word_attention( queries_hidden=self.future_blindness, queries_context=self.enc, keys=self.tw_embed, dropout_rate=hp.dropout_rate, training=is_training, scope="topic_word_attention") self.ct_tw_dec = self.dec + self.twdec ### Feed Forward self.ct_tw_dec = ff( self.ct_tw_dec, num_units=[4 * hp.hidden_units, hp.hidden_units], scope="tw_context_feedforward") # Final linear projection (embedding weights are shared) self.weights = tf.transpose( self.embeddings) # (d_model, vocab_size) self.logits_c = tf.einsum('ntd,dk->ntk', self.dec, self.weights) # (N, T_q, vocab_size) self.logits_t = tf.layers.dense( self.ct_tw_dec, tw_vocab_len) # (N, T_q, tw_vocab_size) if is_training: # Loss_context self.y_smoothed_c = label_smoothing( tf.one_hot(self.y, depth=vocab_len)) # (N, T_q, vocab_size) self.ce_c = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits_c, labels=self.y_smoothed_c) # (N, T_q) self.nonpadding_c = tf.to_float(tf.not_equal( self.y, 0)) # 0: <pad> #(N,T_q) self.loss_c = tf.reduce_sum(self.ce_c * self.nonpadding_c) / ( tf.reduce_sum(self.nonpadding_c) + 1e-7) # Loss_topic self.y_smoothed_t = label_smoothing( tf.one_hot(self.y_twrp, depth=tw_vocab_len)) # (N, T_q, tw_vocab_size) self.ce_t = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits_t, labels=self.y_smoothed_t) # (N, T_q) self.noncost_unk = tf.to_float(tf.not_equal(self.y_twrp, 1)) # 1: <unk> self.noncost_pad = tf.to_float(tf.not_equal(self.y_twrp, 0)) # 0: <pad> self.noncost_t = self.noncost_unk * self.noncost_pad self.loss_t = tf.reduce_sum(self.ce_t * self.noncost_t) / ( tf.reduce_sum(self.noncost_t) + 1e-7) # Loss self.loss = self.loss_c + self.loss_t * hp.penalty self.global_step = tf.train.get_or_create_global_step() self.lr = noam_scheme(hp.lr, self.global_step, hp.warmup_steps) self.optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = self.optimizer.minimize( self.loss, global_step=self.global_step) else: # inference self.prob_c = tf.nn.softmax( self.logits_c) # (N, T_q, vocab_size) self.prob_t = tf.nn.softmax( self.logits_t) # (N, T_q, tw_vocab_size) self.prob_t = tf.einsum( 'nlt,tv->nlv', self.prob_t, self.tw_vocab_overlap) # (N, T_q, vocab_size) self.prob = self.prob_c + self.prob_t * hp.penalty # (N, T_q, vocab_size) self.preds = tf.to_int32(tf.argmax(self.prob, axis=-1)) # (N, T_q) self.y_smoothed = label_smoothing( tf.one_hot(self.y, depth=vocab_len)) # (N, T_q, vocab_size) self.ce = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.prob, labels=self.y_smoothed) # (N, T_q) self.ppl_step = tf.exp(self.ce) # (N, T_q)
def build_mnist_model(num_hidden, decay, activation): x = tf.placeholder(dtype=tf.float32, shape=[None, args.x_dim]) y = tf.placeholder(dtype=tf.float32, shape=[None, 1]) is_training = tf.placeholder(dtype=tf.bool, shape=[]) with tf.variable_scope('network'): out, reg, layers = feed_forward(x, num_hidden, decay, activation, is_training) rmse_loss = tf.reduce_mean(tf.reduce_sum(tf.square(y - out), 1)) loss = rmse_loss + reg all_weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='network') show_variables(all_weights) last_layer_weights = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='network/dense_{}'.format(len(num_hidden) - 1)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='network') for item in update_ops: print('Update {}'.format(item)) lr_decay = tf.placeholder(dtype=tf.float32, shape=[]) all_op = tf.train.GradientDescentOptimizer(args.lr * lr_decay) all_grads = all_op.compute_gradients(loss=loss, var_list=all_weights) all_train_op = all_op.apply_gradients(grads_and_vars=all_grads) lr = args.lr * lr_decay TEMPERATURE = 1e-8 noise_train_ops = [] for g, v in all_grads: if g is None: continue noise_train_ops.append( tf.assign( v, v - lr * g - tf.sqrt(lr) * TEMPERATURE * tf.random_normal(v.shape, stddev=1))) all_train_op_noise = tf.group(noise_train_ops) lst_op = tf.train.GradientDescentOptimizer(args.lr * lr_decay) lst_grads = lst_op.compute_gradients(loss=loss, var_list=last_layer_weights) lst_train_op = lst_op.apply_gradients(grads_and_vars=lst_grads) reset_lst_op = tf.variables_initializer(lst_op.variables()) reset_all_op = tf.variables_initializer(all_op.variables()) weight_dict = {} for item in all_weights: if 'kernel' in item.name: weight_dict[item.name] = item print('weights to be saved') print(weight_dict) ph = {'x': x, 'y': y, 'lr_decay': lr_decay, 'is_training': is_training} ph['kernel_l0'] = tf.placeholder( dtype=tf.float32, shape=weight_dict['network/dense_0/kernel:0'].get_shape()) #ph['bias_l0'] = tf.placeholder(dtype=tf.float32, shape=weight_dict['network/dense_0/bias:0'].get_shape()) targets = { 'layers': layers, 'all': { 'weights': all_weights, 'train': all_train_op, 'rmse_loss': rmse_loss, 'update': update_ops, 'reg_loss': reg }, 'all_noise': { 'weights': all_weights, 'train': all_train_op_noise, 'rmse_loss': rmse_loss, 'update': update_ops, 'reg_loss': reg }, 'lst': { 'weights': all_weights, 'train': lst_train_op, 'rmse_loss': rmse_loss, 'update': update_ops, 'reg_loss': reg }, 'eval': { 'weights': weight_dict, 'rmse_loss': rmse_loss, 'out': out }, 'assign_weights': { 'weights_l0': tf.assign(weight_dict['network/dense_0/kernel:0'], ph['kernel_l0']), #'bias': tf.assign(weight_dict['network/dense_0/bias:0'], ph['bias_l0']), }, 'reset': { 'lst': reset_lst_op, 'all': reset_all_op } } return ph, targets
tf.reset_default_graph() doc_vectors = tf.placeholder(dtype=tf.float32,shape=[None, vocab_size], name='doc_vectors') y = tf.placeholder(tf.float32, [None, 1], name='y') # <codecell> learning_rate = 0.01 # <codecell> layer_one_output = fully_connected(doc_vectors, 100, activation_fn=tf.nn.relu) logits = fully_connected(layer_one_output,1, activation_fn=None) prob = tf.nn.sigmoid(logits, name='prob') x_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits, name='x_entropy') loss = tf.reduce_mean(x_entropy, name='loss') with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate) training_op = optimizer.minimize(loss, name='train_op') # <codecell> file_writer = tf.summary.FileWriter('tf_logs/logistic_regression', tf.get_default_graph()) # <codecell> init = tf.global_variables_initializer() saver = tf.train.Saver() sess = tf.InteractiveSession() init.run()
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0) ) embed = tf.nn.embedding_lookup(embeddings, train_inputs) nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size)) ) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=train_labels, inputs=embed, num_sampled=num_sampled, num_classes=vocabulary_size )) optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup( normalized_embeddings, valid_dataset ) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b = True) init = tf.global_variables_initializer()
W4 = tf.get_variable("W4", shape=[128 * 4 * 4, 625], initializer=tf.contrib.layers.xavier_initializer()) b4 = tf.Variable(tf.random_normal([625])) L4 = tf.nn.relu(tf.matmul(L3_flat, W4) + b4) L4 = tf.nn.dropout(L4, keep_prob=keep_prob) W5 = tf.get_variable("W5", shape=[625, 10], initializer=tf.contrib.layers.xavier_initializer()) b5 = tf.Variable(tf.random_normal([10])) hypothesis = tf.matmul(L4, W5) + b5 L5 = tf.nn.relu(hypothesis) L5 = tf.nn.dropout(L5, keep_prob=keep_prob) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) ### sess = tf.Session() sess.run(tf.global_variables_initializer()) # train my model print('Learning started. It takes sometime.') for epoch in range(training_epochs): avg_cost = 0 total_batch = int(mnist.train.num_examples / batch_size) for i in range(total_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
def train(train_data_set, val_data_set, load_model_path, save_model_path): x = tf.placeholder( tf.float32, shape=[ None, sub_Config.IMAGE_W, sub_Config.IMAGE_H, sub_Config.IMAGE_CHANNEL ], name='input_x' ) y_ = tf.placeholder( tf.float32, shape=[ None, ] ) tf.summary.histogram( 'label', y_ ) global_step = tf.Variable(0, trainable=False) # variable_average = tf.train.ExponentialMovingAverage( # sub_Config.MOVING_AVERAGE_DECAY, # global_step # ) # vaeriable_average_op = variable_average.apply(tf.trainable_variables()) # regularizer = tf.contrib.layers.l2_regularizer(sub_Config.REGULARIZTION_RATE) is_training = tf.placeholder('bool', [], name='is_training') FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar-data', 'where to store the dataset') tf.app.flags.DEFINE_boolean('use_bn', True, 'use batch normalization. otherwise use biases') y = inference_small(x, is_training=is_training, num_classes=sub_Config.OUTPUT_NODE, use_bias=FLAGS.use_bn, num_blocks=3) tf.summary.histogram( 'logits', tf.argmax(y, 1) ) loss_ = loss( logits=y, labels=tf.cast(y_, np.int32) ) tf.summary.scalar( 'loss', loss_ ) train_op = tf.train.GradientDescentOptimizer( learning_rate=sub_Config.LEARNING_RATE ).minimize( loss=loss_, global_step=global_step ) # with tf.control_dependencies([train_step, vaeriable_average_op]): # train_op = tf.no_op(name='train') with tf.variable_scope('accuracy'): accuracy_tensor = tf.reduce_mean( tf.cast( tf.equal(x=tf.argmax(y, 1), y=tf.cast(y_, tf.int64)), tf.float32 ) ) tf.summary.scalar( 'accuracy', accuracy_tensor ) saver = tf.train.Saver() merge_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if load_model_path: saver.restore(sess, load_model_path) writer = tf.summary.FileWriter('./log/fine_tuning/train', tf.get_default_graph()) val_writer = tf.summary.FileWriter('./log/fine_tuning/val', tf.get_default_graph()) for i in range(sub_Config.ITERATOE_NUMBER): images, labels = train_data_set.get_next_batch(sub_Config.BATCH_SIZE, sub_Config.BATCH_DISTRIBUTION) images = changed_shape(images, [ len(images), sub_Config.IMAGE_W, sub_Config.IMAGE_W, sub_Config.IMAGE_CHANNEL ]) _, loss_value, accuracy_value, summary, global_step_value = sess.run( [train_op, loss_, accuracy_tensor, merge_op, global_step], feed_dict={ x: images, y_: labels } ) writer.add_summary( summary=summary, global_step=global_step_value ) if i % 100 == 0 and i != 0 and save_model_path is not None: # 保存模型 五分类每500步保存一下模型 import os save_path = os.path.join(save_model_path, str(global_step_value)) if not os.path.exists(save_path): os.mkdir(save_path) save_path += '/model.ckpt' print 'mode saved path is ', save_path saver.save(sess, save_path) if i % 100 == 0: validation_images, validation_labels = val_data_set.get_next_batch() validation_images = changed_shape( validation_images, [ len(validation_images), sub_Config.IMAGE_W, sub_Config.IMAGE_W, 1 ] ) validation_accuracy, validation_loss, summary, logits = sess.run( [accuracy_tensor, loss_, merge_op, y], feed_dict={ x: validation_images, y_: validation_labels } ) calculate_acc_error( logits=np.argmax(logits, 1), label=validation_labels, show=True ) binary_acc = acc_binary_acc( logits=np.argmax(logits, 1), label=validation_labels, ) val_writer.add_summary(summary, global_step_value) print 'step is %d,training loss value is %g, accuracy is %g ' \ 'validation loss value is %g, accuracy is %g, binary_acc is %g' % \ (global_step_value, loss_value, accuracy_value, validation_loss, validation_accuracy, binary_acc) writer.close() val_writer.close()
# Getting final output through indexing after reversing last_output = outputs[-1] # As rnn model output the final layer through Relu activation softmax is # used for final output. output = tf.nn.softmax(last_output) # Computing the Cross Entropy loss cross_entropy = -tf.reduce_sum(y * tf.log(output)) # Trainning with Adadelta Optimizer train_step = tf.train.AdamOptimizer().minimize(cross_entropy) # Calculatio of correct prediction and accuracy correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(output, 1)) accuracy = (tf.reduce_mean(tf.cast(correct_prediction, tf.float32))) * 100 # # Dataset Preparation # Function to get on hot def get_on_hot(number): on_hot = [0] * 10 on_hot[number] = 1 return on_hot # Using Sklearn MNIST dataset. digits = datasets.load_digits() X = digits.images Y_ = digits.target
v2 = tf.Variable([[-1., -2], [1., -21.]]) # In[17]: lmbd = far.get_hyperparameter('lambda', initializer=tf.ones_initializer, shape=v2.get_shape()) reg2 = far.get_hyperparameter('reg2', 0.1) eta = far.get_hyperparameter('eta', 0.1) beta1 = far.get_hyperparameter('beta1', 1.) beta2 = far.get_hyperparameter('beta2', 2.) # noinspection PyTypeChecker cost = tf.reduce_mean(v1**2) + tf.reduce_sum(lmbd*v2**2) + reg2*tf.nn.l2_loss(v1) io_optim = far.AdamOptimizer(eta, tf.nn.sigmoid(beta1), tf.nn.sigmoid(beta2), epsilon=1.e-4) oo = tf.reduce_mean(v1*v2) rhg = far.ReverseHG() optim_oo = tf.train.AdamOptimizer() # ts_hy = optim_oo.apply_gradients(rhg.hgrads_hvars()) farho = far.HyperOptimizer(rhg) run = farho.minimize(oo, optim_oo, cost, io_optim) print(tf.global_variables()) print(far.utils.hyperparameters())
layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3, weights['decoder_h4']), biases['decoder_b4'])) return layer_4 """ # Construct model encoder_op = encoder(X) decoder_op = decoder(encoder_op) # Prediction y_pred = decoder_op # Targets (Labels) are the input data. y_true = X # Define loss and optimizer, minimize the squared error cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # Launch the graph with tf.Session() as sess: # tf.initialize_all_variables() no long valid from # 2017-03-02 if using tensorflow >= 0.12 if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1: init = tf.initialize_all_variables() else: init = tf.global_variables_initializer() sess.run(init) total_batch = int(mnist.train.num_examples/batch_size) # Training cycle for epoch in range(training_epochs):
import tensorflow as tf tf.set_random_seed(777) # for reproducibility # tf Graph Input X = [1, 2, 3] Y = [1, 2, 3] # Set wrong model weights W = tf.Variable(5.) # Linear model hypothesis = X * W # Manual gradient gradient = tf.reduce_mean((W * X - Y) * X) * 2 # cost/loss function cost = tf.reduce_mean(tf.square(hypothesis - Y)) # Minimize: Gradient Descent Magic optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) train = optimizer.minimize(cost) # Get gradients gvs = optimizer.compute_gradients(cost) # Apply gradients capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs] apply_gradients = optimizer.apply_gradients(capped_gvs) # Launch the graph in a session.
import tensorflow as tf import numpy as np #create data x_data = np.random.rand(100).astype(np.float32) y_data = x_data * 0.1 + 0.3 ###create tensorflow structure start### Weights = tf.Variable(tf.random_uniform([1],-1,1)) biases = tf.Variable((tf.zeros([1]))) y = Weights * x_data + biases loss = tf.reduce_mean(tf.square(y - y_data)) optimizer = tf.train.GradientDescentOptimizer(0.5) train = optimizer.minimize(loss) init = tf.global_variables_initializer() ###created tensorflow structure end### sess = tf.Session() #激活神经网络 sess.run(init) for step in range(201): sess.run(train) if step % 20 == 0: print(step,sess.run(Weights),sess.run(biases))
image_size = mnist.train.images.shape[1] inputs_ = tf.placeholder(tf.float32, (None, image_size), name='inputs') targets_ = tf.placeholder(tf.float32, (None, image_size), name='targets') # Output of hidden layer encoded = tf.layers.dense(inputs_, encoding_dim, activation=tf.nn.relu) # Output layer logits logits = tf.layers.dense(encoded, image_size, activation=None) # Sigmoid output from decoded = tf.nn.sigmoid(logits, name='output') loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits) cost = tf.reduce_mean(loss) opt = tf.train.AdamOptimizer(0.001).minimize(cost) # Create the session sess = tf.Session() epochs = 20 batch_size = 200 sess.run(tf.global_variables_initializer()) for e in range(epochs): for ii in range(mnist.train.num_examples//batch_size): batch = mnist.train.next_batch(batch_size) feed = {inputs_: batch[0], targets_: batch[0]} batch_cost, _ = sess.run([cost, opt], feed_dict=feed) print("Epoch: {}/{}...".format(e+1, epochs), "Training loss: {:.4f}".format(batch_cost)) fig, axes = plt.subplots(nrows=2, ncols=10, sharex=True, sharey=True, figsize=(20,4))
def onSetup(self): #==================== Initialize ====================# # File Path self.scripts_path = ue.get_content_dir() + "Scripts" self.model_directory = self.scripts_path + "/model" self.model_path = self.model_directory + "/model.ckpt" # Game self.Sequence = 1 self.PlayNumber = 1 # Epsilon self.Epsilon = EPSILONMINVALUE # ReplayMemory self.Memory = ReplayMemory() self.LastAction = -1 # State self.reset() #==================== Hypothesis ====================# self.input = tf.placeholder(tf.float32, shape=[None, INPUTS]) # Model w1 = tf.Variable(tf.truncated_normal(shape=[INPUTS, HIDDEN1S], stddev=1.0 / math.sqrt(float(INPUTS))), dtype=tf.float32, name='w1') b1 = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S], stddev=0.01), dtype=tf.float32, name='b1') hidden1 = tf.nn.relu(tf.matmul(self.input, w1) + b1, name='hidden1') w2 = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S, HIDDEN2S], stddev=1.0 / math.sqrt(float(HIDDEN1S))), dtype=tf.float32, name='w2') b2 = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S], stddev=0.01), dtype=tf.float32, name='b2') hidden2 = tf.nn.relu(tf.matmul(hidden1, w2) + b2, name='hidden2') wo = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S, OUTPUTS], stddev=1.0 / math.sqrt(float(HIDDEN2S))), dtype=tf.float32, name='wo') bo = tf.Variable(tf.truncated_normal(shape=[OUTPUTS], stddev=0.01), dtype=tf.float32, name='bo') self.output = tf.matmul(hidden2, wo) + bo # Target w1_t = tf.Variable(tf.truncated_normal(shape=[INPUTS, HIDDEN1S], stddev=1.0 / math.sqrt(float(INPUTS))), dtype=tf.float32, name='w1_t') b1_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S], stddev=0.01), dtype=tf.float32, name='b1_t') hidden1_t = tf.nn.relu(tf.matmul(self.input, w1_t) + b1_t, name='hidden1') w2_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN1S, HIDDEN2S], stddev=1.0 / math.sqrt(float(HIDDEN1S))), dtype=tf.float32, name='w2_t') b2_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S], stddev=0.01), dtype=tf.float32, name='b2_t') hidden2_t = tf.nn.relu(tf.matmul(hidden1_t, w2_t) + b2_t, name='hidden2') wo_t = tf.Variable(tf.truncated_normal(shape=[HIDDEN2S, OUTPUTS], stddev=1.0 / math.sqrt(float(HIDDEN2S))), dtype=tf.float32, name='wo_t') bo_t = tf.Variable(tf.truncated_normal(shape=[OUTPUTS], stddev=0.01), dtype=tf.float32, name='bo_t') self.output_t = tf.matmul(hidden2_t, wo_t) + bo_t # Cost & Optimizer self.target = tf.placeholder(tf.float32, shape=[None, OUTPUTS]) self.cost = tf.reduce_mean(tf.square(self.output - self.target)) / 2 self.optimizer = tf.train.AdamOptimizer(LEARNINGRATE).minimize( self.cost) #==================== Session & Saver ====================# self.sess = tf.Session() self.saver = tf.train.Saver() ue.log('######################################################') try: self.saver.restore(self.sess, self.model_path) ue.log('#################### loaded model ####################') except: self.sess.run(tf.global_variables_initializer()) ue.log('################## no stored model ##################') ue.log('######################################################') pass
def kl_loss(mean, logvar): # shape : [batch_size, channel] loss = 0.5 * tf.reduce_sum(tf.square(mean) + tf.exp(logvar) - 1 - logvar, axis=-1) loss = tf.reduce_mean(loss) return loss
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # placeholder for data x = tf.placeholder(tf.float32, [None, 784]) # placeholder that turns BN during training or off during inference train_phase = tf.placeholder(tf.bool, name='phase_train') # variables for parameters hiden_units = 25 layer1 = get_NN_layer(x, input_dim=784, output_dim=hiden_units, scope='layer1', train_phase=train_phase) # create model W_final = tf.Variable(tf.truncated_normal(shape=[hiden_units, 10], mean=0.0, stddev=0.1)) b_final = tf.Variable(tf.constant(0.1, shape=[10])) y = tf.nn.softmax(tf.matmul(layer1, W_final) + b_final) ### training y_ = tf.placeholder(tf.float32, [None, 10]) cross_entropy = tf.reduce_mean( -tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]) ) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) steps = 3000 for iter_step in xrange(steps): #feed_dict_batch = get_batch_feed(X_train, Y_train, M, phase_train) batch_xs, batch_ys = mnist.train.next_batch(100) # Collect model statistics if iter_step%1000 == 0: batch_xstrain, batch_xstrain = batch_xs, batch_ys #simualtes train data batch_xcv, batch_ycv = mnist.test.next_batch(5000) #simualtes CV data batch_xtest, batch_ytest = mnist.test.next_batch(5000) #simualtes test data # do inference train_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xs, y_:batch_ys, train_phase: False}) cv_error = sess.run(fetches=cross_entropy, feed_dict={x: batch_xcv, y_:batch_ycv, train_phase: False})
def generator_loss(Ra, loss_func, real, fake): # Ra = Relativistic fake_loss = 0 real_loss = 0 if Ra and loss_func.__contains__('wgan'): print("No exist [Ra + WGAN], so use the {} loss function".format(loss_func)) Ra = False if Ra: fake_logit = (fake - tf.reduce_mean(real)) real_logit = (real - tf.reduce_mean(fake)) if loss_func == 'lsgan': fake_loss = tf.reduce_mean(tf.square(fake_logit - 1.0)) real_loss = tf.reduce_mean(tf.square(real_logit + 1.0)) if loss_func == 'gan' or loss_func == 'gan-gp' or loss_func == 'dragan': fake_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(fake), logits=fake_logit)) real_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(real), logits=real_logit)) if loss_func == 'hinge': fake_loss = tf.reduce_mean(relu(1.0 - fake_logit)) real_loss = tf.reduce_mean(relu(1.0 + real_logit)) else: if loss_func.__contains__('wgan'): fake_loss = -tf.reduce_mean(fake) if loss_func == 'lsgan': fake_loss = tf.reduce_mean(tf.square(fake - 1.0)) if loss_func == 'gan' or loss_func == 'gan-gp' or loss_func == 'dragan': fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(fake), logits=fake)) if loss_func == 'hinge': fake_loss = -tf.reduce_mean(fake) loss = fake_loss + real_loss return loss
def kl_loss_2(mean, var): # shape : [batch_size, channel] loss = 0.5 * tf.reduce_sum(tf.square(mean) + tf.square(var) - tf.log(1e-8 + tf.square(var)) - 1, axis=-1) loss = tf.reduce_mean(loss) return loss