def compute_gradients(self, loss, var_list=None, *args, **kwargs): if self._scale != 1.0: loss = tf.scalar_mul(self._scale, loss) gradvar = self._optimizer.compute_gradients(loss, var_list, *args, **kwargs) gradvar = [(tf.scalar_mul(1. / self._scale, g), v) for g, v in gradvar] return gradvar
def do_loss_initializations(self, yloss_type="hinge_loss", diversity_loss_type="dpp_style:inverse_dist", feature_weights="inverse_mad"): """Defines the optimization loss""" # define the loss parts self.yloss_type = yloss_type self.diversity_loss_type = diversity_loss_type self.loss_weights = [ self.yloss_type, self.diversity_loss_type, feature_weights ] # loss part 1: y-loss self.loss_part1 = self.compute_first_part_of_loss(self.yloss_type) # loss part 2: similarity between CFs and original instance if feature_weights == "inverse_mad": normalized_mads = self.data_interface.get_mads(normalized=True) feature_weights = {} for feature in normalized_mads: feature_weights[feature] = round(1 / normalized_mads[feature], 2) feature_weights_list = [] for feature in self.data_interface.encoded_feature_names: if feature in feature_weights: feature_weights_list.append(feature_weights[feature]) else: feature_weights_list.append(1.0) feature_weights_list = [feature_weights_list] self.feature_weights = tf.Variable(self.minx, dtype=tf.float32) self.dice_sess.run( tf.assign(self.feature_weights, np.array(feature_weights_list, dtype=np.float32))) self.loss_part2 = self.compute_second_part_of_loss() # loss part 3: diversity between CFs if self.total_random_inits > 0: # random initialization method self.loss_part3 = tf.constant(0.0, dtype=tf.float32) else: self.loss_part3 = self.compute_third_part_of_loss( self.diversity_loss_type) # loss part 4: diversity between CFs self.loss_part4 = self.compute_fourth_part_of_loss() # final loss: self.loss = tf.add( tf.subtract( tf.add(self.loss_part1, tf.scalar_mul(self.weights[0], self.loss_part2)), tf.scalar_mul(self.weights[1], self.loss_part3)), tf.scalar_mul(self.weights[2], self.loss_part4))
def apply_gradients(self, gradvars, *args, **kwargs): v_list = [tf.norm(tensor=v, ord=2) for _, v in gradvars] g_list = [ tf.norm(tensor=g, ord=2) if g is not None else 0.0 for g, _ in gradvars ] v_norms = tf.stack(v_list) g_norms = tf.stack(g_list) zeds = tf.zeros_like(v_norms) # assign epsilon if weights or grads = 0, to avoid division by zero # also prevent biases to get stuck at initialization (0.) cond = tf.logical_and(tf.not_equal(v_norms, zeds), tf.not_equal(g_norms, zeds)) true_vals = tf.scalar_mul(self._eta, tf.div(v_norms, g_norms)) false_vals = tf.fill(tf.shape(v_norms), self._epsilon) larc_local_lr = tf.where(cond, true_vals, false_vals) if self._clip: ones = tf.ones_like(v_norms) lr = tf.fill(tf.shape(v_norms), self._learning_rate) # We need gradients to compute local learning rate, # so compute_gradients from initial optimizer have to called # for which learning rate is already fixed # We then have to scale the gradients instead of the learning rate. larc_local_lr = tf.minimum(tf.div(larc_local_lr, lr), ones) gradvars = [(tf.multiply(larc_local_lr[i], g), v) if g is not None else (None, v) for i, (g, v) in enumerate(gradvars)] return self._optimizer.apply_gradients(gradvars, *args, **kwargs)
def build_train_op(self, lr_boundaries, lr_values, optimizer_type): train_step = tf.Variable(initial_value=0, trainable=False) self.train_step = train_step prob, logits = self.build_network(self.train_image_placeholder, True, False) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.train_label_placeholder, logits=logits) weighted_loss = tf.multiply(cross_entropy, self.train_weight_placeholder) cross_entropy_mean = tf.reduce_mean(weighted_loss, name='cross_entropy') # Accuracy Calculation prediction = tf.equal(tf.cast(tf.argmax(prob, axis=1), tf.int32), self.train_label_placeholder) prediction = tf.cast(prediction, tf.float32) ######################## # variance -> distance mean, variance = tf.nn.moments(prob, axes=[1]) # distance = sign(prediction) * variance # sign function : y = 2*prediction - 1 sign = tf.subtract(tf.scalar_mul(2.0, prediction), 1.0) distance = sign * tf.sqrt(variance) ######################## self.train_accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32)) self.learning_rate = tf.train.piecewise_constant( train_step, lr_boundaries, lr_values) # Optimizer Setting if optimizer_type == 'sgd': opt = tf.train.GradientDescentOptimizer(self.learning_rate) elif optimizer_type == 'momentum': opt = tf.train.MomentumOptimizer(self.learning_rate, FLAGS.momentum, use_nesterov=FLAGS.nesterov) weight = [i for i in tf.trainable_variables() if 'weight' in i.name] bias = [i for i in tf.trainable_variables() if 'bias' in i.name] beta = [i for i in tf.trainable_variables() if 'beta' in i.name] gamma = [i for i in tf.trainable_variables() if 'gamma' in i.name] assert len(weight) + len(bias) + len(beta) + len(gamma) == len( tf.trainable_variables()) grads, total_loss, cross_entropy_loss = self.train_graph_model( opt, cross_entropy_mean) train_op = self.build_graph_train(opt, grads, optimizer_type, train_step) return cross_entropy_loss, self.train_accuracy, train_op, cross_entropy, prob, distance
def __init__(self, learning_rate, num_layers, size, size_layer, output_size, forget_bias=0.1, lambda_coeff=0.5): def lstm_cell(size_layer): return tf.nn.rnn_cell.GRUCell(size_layer) rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple=False, ) self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) drop = tf.nn.rnn_cell.DropoutWrapper(rnn_cells, output_keep_prob=forget_bias) self.hidden_layer = tf.placeholder(tf.float32, (None, num_layers * size_layer)) _, last_state = tf.nn.dynamic_rnn(drop, self.X, initial_state=self.hidden_layer, dtype=tf.float32) self.z_mean = tf.layers.dense(last_state, size) self.z_log_sigma = tf.layers.dense(last_state, size) epsilon = tf.random_normal(tf.shape(self.z_log_sigma)) self.z_vector = self.z_mean + tf.exp(self.z_log_sigma) with tf.variable_scope('decoder', reuse=False): rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple=False) drop_dec = tf.nn.rnn_cell.DropoutWrapper( rnn_cells_dec, output_keep_prob=forget_bias) x = tf.concat([tf.expand_dims(self.z_vector, axis=0), self.X], axis=1) self.outputs, self.last_state = tf.nn.dynamic_rnn( drop_dec, self.X, initial_state=last_state, dtype=tf.float32) self.logits = tf.layers.dense(self.outputs[-1], output_size) self.lambda_coeff = lambda_coeff self.kl_loss = -0.5 * tf.reduce_sum( 1.0 + 2 * self.z_log_sigma - self.z_mean**2 - tf.exp(2 * self.z_log_sigma), 1) self.kl_loss = tf.scalar_mul(self.lambda_coeff, self.kl_loss) self.cost = tf.reduce_mean( tf.square(self.Y - self.logits) + self.kl_loss) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)
def __init__(self, x, y=None, teacherLogits=None, lr=1e-04, nClasses=8, imgXdim=84, imgYdim=84, batchSize=64, keepProb=1.0, temperature=8, lambda_=0.5): self.x = x self.w = {} self.b = {} self.y = y self.teacherLogits = teacherLogits self.lambda_ = lambda_ self.T = temperature self.imgXdim = imgXdim self.imgYdim = imgYdim self.nClasses = nClasses self.batchSize = batchSize self.learningRate = lr self.dropout = keepProb self.fcOutSize = 48 # Initialize parameters randomly and run self.initParameters() self.output, self.layerInfo = self.run() if self.teacherLogits != None: # For training # Define losses and optimizers & train the architecture with KD self.outputTeacher = tf.scalar_mul(1.0 / self.T, self.teacherLogits) self.outputTeacher = tf.nn.softmax(self.outputTeacher) self.cost_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.output, labels=self.y)) self.pred = tf.nn.softmax(self.output) self.output = tf.scalar_mul(1.0 / self.T, self.output) self.cost_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.output, labels=self.outputTeacher)) self.cost = ((1.0 - lambda_) * self.cost_1 + lambda_ * self.cost_2) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learningRate).minimize(self.cost) else: # For standalone testing if self.y != None: self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.output, labels=self.y)) self.pred = tf.nn.softmax(self.output) if self.y != None: # For labeled images # Evaluate model self.correct_pred= tf.equal(tf.argmax(self.pred, 1), tf.argmax(self.y, 1)) self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32))
def __init__(self, state_size, action_size, gamma_reg=0.001, minibatch_size=5, **kwargs): """ Parameters ---------- state_size, action_size : int Size of the environment state space and action space gamma_reg : float, optional LS-IELM regularisation parameter minibatch_size : int, optional Size of minibatches for updating **kwargs Additional keyword arguments passed to `SingleLayerNetwork` """ super().__init__(state_size, action_size, **kwargs) self.k = int(minibatch_size) self.prep_state = self.act self.H = tf.placeholder(shape=[self.k, self.N_hid], dtype=tf.float32) self.T = tf.placeholder(shape=[self.k, action_size], dtype=tf.float32) H_t = tf.transpose(self.H) A_inv = tf.Variable(tf.random_uniform([self.N_hid, self.N_hid], 0, 1)) A0 = tf.add(tf.scalar_mul(1.0 / gamma_reg, tf.eye(self.N_hid)), tf.matmul(H_t, self.H)) A0_inv = tf.matrix_inverse(A0) W0 = tf.matmul(A0_inv, tf.matmul(H_t, self.T)) self.initModel = (self.W.assign(W0), A_inv.assign(A0_inv)) K1 = tf.add(tf.matmul(self.H, tf.matmul(A_inv, H_t)), tf.eye(self.k)) K_t = tf.subtract( tf.eye(self.N_hid), tf.matmul(A_inv, tf.matmul(H_t, tf.matmul(tf.matrix_inverse(K1), self.H)))) W_new = tf.add( tf.matmul(K_t, self.W), tf.matmul(tf.matmul(K_t, A_inv), tf.matmul(H_t, self.T))) A_new = tf.matmul(K_t, A_inv) self.updateModel = (self.W.assign(W_new), A_inv.assign(A_new)) self.first = True self.var_init()
def __init__(self): # placeholder self.sph_user = tf.sparse_placeholder(tf.int32, name='sph_user') self.sph_doc = tf.sparse_placeholder(tf.int32, name='sph_doc') self.sph_con = tf.sparse_placeholder(tf.int32, name='sph_con') self.ph_reward = tf.placeholder(tf.float32, name='ph_reward') self.ph_nq = tf.placeholder( tf.float32, shape=[pd['batch_size'], pd['rnn_max_len']], name='ph_nq') # main networks self.dst_embed, self.mq = self.build_net('main') # target networks _, self.tq = self.build_net('target') diff = tf.reshape(self.ph_reward, [-1]) + tf.scalar_mul( tf.constant(pd['gamma']), tf.reshape( self.ph_nq, [-1])) - tf.reshape(self.mq, [-1]) self.loss = tf.reduce_mean(tf.square(diff)) self.a_grads = tf.clip_by_global_norm( tf.gradients(self.mq, self.dst_embed), pd['grad_clip'])[0] vs = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='main/value') vs.extend( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='main/feat_embedding')) self.grads = tf.clip_by_global_norm(tf.gradients(self.loss, vs), pd['grad_clip'])[0] with tf.variable_scope('train_value'): optimizer = tf.train.AdamOptimizer(pd['lr']) self.opt = optimizer.apply_gradients(zip(self.grads, vs)) self.m_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="main/value") self.m_params.extend( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='main/feat_embedding')) self.t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="target/value") self.t_params.extend( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target/feat_embedding')) alpha = pd['double_networks_sync_step'] self.sync_op = [ tf.assign(t, (1.0 - alpha) * t + alpha * m) for t, m in zip(self.t_params, self.m_params) ] self.total_loss, self.batch_counter = 0.0, 0
def build_test_op(self): prob, logits = self.build_network(self.test_image_placeholder, False, True) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.test_label_placeholder, logits=logits) prediction = tf.equal(tf.cast(tf.argmax(prob, axis=1), tf.int32), self.test_label_placeholder) prediction = tf.cast(prediction, tf.float32) self.test_loss = tf.reduce_mean(loss) self.test_accuracy = tf.reduce_mean(prediction) # variance -> distance mean, variance = tf.nn.moments(prob, axes=[1]) # distance = sign(prediction) * variance # sign function : y = 2*prediction - 1 sign = tf.subtract(tf.scalar_mul(2.0, prediction), 1.0) distance = sign * tf.sqrt(variance) return self.test_loss, self.test_accuracy, loss, prob
def build_train_op(self, lr_boundaries, lr_values, optimizer_type): train_step = tf.Variable(initial_value=0, trainable=False) self.train_step = train_step prob, logits = self.build_network(self.train_image_placeholder, True, False) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.train_label_placeholder, logits=logits ) prediction = tf.equal(tf.cast(tf.argmax(prob, axis=1), tf.int32), self.train_label_placeholder) prediction = tf.cast(prediction, tf.float32) # variance -> distance mean, variance = tf.nn.moments(prob, axes=[1]) # distance = sign(prediction) * variance # sign function : y = 2*prediction - 1 sign = tf.subtract(tf.scalar_mul(2.0, prediction), 1.0) distance = sign * tf.sqrt(variance) l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) weighted_loss = tf.multiply(loss, self.train_weight_placeholder) self.train_loss = tf.reduce_mean(weighted_loss) + l2_loss*weight_decay self.train_accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32)) self.learning_rate = tf.train.piecewise_constant(train_step, lr_boundaries, lr_values) if optimizer_type == "momentum": optimizer = tf.train.MomentumOptimizer(self.learning_rate, 0.9, use_nesterov=True) elif optimizer_type == "sgd": optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(self.train_loss, global_step=train_step) return self.train_loss, self.train_accuracy, train_op, loss, prob, distance
def aggregate_single_gradient_using_copy(grad_and_vars, use_mean, check_inf_nan): """Calculate the average gradient for a shared variable across all towers. Note that this function provides a synchronization point across all towers. Args: grad_and_vars: A list or tuple of (gradient, variable) tuples. Each (gradient, variable) pair within the outer list represents the gradient of the variable calculated for a single tower, and the number of pairs equals the number of towers. use_mean: if True, mean is taken, else sum of gradients is taken. check_inf_nan: check grads for nans and infs. Returns: The tuple ([(average_gradient, variable),], has_nan_or_inf) where the gradient has been averaged across all towers. The variable is chosen from the first tower. The has_nan_or_inf indicates the grads has nan or inf. """ grads = [g for g, _ in grad_and_vars] if any(isinstance(g, tf.IndexedSlices) for g in grads): # TODO(reedwm): All-reduce IndexedSlices more effectively. grad = aggregate_indexed_slices_gradients(grads) else: grad = tf.add_n(grads) if use_mean and len(grads) > 1: grad = tf.scalar_mul(1.0 / len(grads), grad) v = grad_and_vars[0][1] if check_inf_nan: with tf.name_scope('check_for_inf_and_nan'): has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads))) return (grad, v), has_nan_or_inf else: return (grad, v), None
import tensorflow.compat.v1 as tf # 创建二维张量,充当被加张量、被减张量、被乘张量、被除张量 t1 = tf.constant([[0, 1, 2], [3, 4, 5]], tf.float32) # 创建与t1同类型的张量 # t2 = tf.constant([[5, 3, 1]], tf.float32) # 与t1列数相同则按行计算 t2 = tf.constant([[1], [2]], tf.float32) # 与t1行数相同则按列计算 session = tf.Session() # 计算两个二维张量相加 result_add = tf.add(t1, t2) # 等价于result_add = t1+t2 # 计算两个二维向量相减 result_subtract = tf.subtract(t1, t2) # 等价于result_subtract = t1-t2 # 计算两个二维向量相乘 result_multiply = tf.multiply(t1, t2) # 等价于result_multiply = t1*t2 # 计算一个标量与一个张量相乘 result_scalar_mul = tf.scalar_mul(2, t1) # 等价于result_scalar_mul = 2*t1 # 计算两个二维张量相除 result_div = tf.div(t1, t2) # 等价于result_div = t1/t2 # 打印结果 print("二维张量t1:\n", session.run(t1)) print("二维张量t2:\n", session.run(t2)) print("相加结果result_add:\n", session.run(result_add)) print("相减结果result_subtract:\n", session.run(result_subtract)) print("相乘结果result_multiply:\n", session.run(result_multiply)) print("标量2与张量t1相乘结果result_scalar_mul:\n", session.run(result_scalar_mul)) print("相除结果result_div:\n", session.run(result_div))
def _static_subsample(self, indicator, batch_size, labels): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. N should be a complie time constant. batch_size: desired batch size. This scalar cannot be None. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. N should be a complie time constant. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. It ensures the length of output of the subsample is always batch_size, even when number of examples set to True in indicator is less than batch_size. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ # Check if indicator and labels have a static size. if not indicator.shape.is_fully_defined(): raise ValueError( 'indicator must be static in shape when is_static is' 'True') if not labels.shape.is_fully_defined(): raise ValueError('labels must be static in shape when is_static is' 'True') if not isinstance(batch_size, int): raise ValueError( 'batch_size has to be an integer when is_static is' 'True.') input_length = tf.shape(indicator)[0] # Set the number of examples set True in indicator to be at least # batch_size. num_true_sampled = tf.reduce_sum(tf.cast(indicator, tf.float32)) additional_false_sample = tf.less_equal( tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)), batch_size - num_true_sampled) indicator = tf.logical_or(indicator, additional_false_sample) # Shuffle indicator and label. Need to store the permutation to restore the # order post sampling. permutation = tf.random_shuffle(tf.range(input_length)) indicator = ops.matmul_gather_on_zeroth_axis( tf.cast(indicator, tf.float32), permutation) labels = ops.matmul_gather_on_zeroth_axis(tf.cast(labels, tf.float32), permutation) # index (starting from 1) when indicator is True, 0 when False indicator_idx = tf.where(tf.cast(indicator, tf.bool), tf.range(1, input_length + 1), tf.zeros(input_length, tf.int32)) # Replace -1 for negative, +1 for positive labels signed_label = tf.where( tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32), tf.scalar_mul(-1, tf.ones(input_length, tf.int32))) # negative of index for negative label, positive index for positive label, # 0 when indicator is False. signed_indicator_idx = tf.multiply(indicator_idx, signed_label) sorted_signed_indicator_idx = tf.nn.top_k(signed_indicator_idx, input_length, sorted=True).values [num_positive_samples, num_negative_samples ] = self._get_num_pos_neg_samples(sorted_signed_indicator_idx, batch_size) sampled_idx = self._get_values_from_start_and_end( sorted_signed_indicator_idx, num_positive_samples, num_negative_samples, batch_size) # Shift the indices to start from 0 and remove any samples that are set as # False. sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32) sampled_idx = tf.multiply( tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32), sampled_idx) sampled_idx_indicator = tf.cast( tf.reduce_sum(tf.one_hot(sampled_idx, depth=input_length), axis=0), tf.bool) # project back the order based on stored permutations reprojections = tf.one_hot(permutation, depth=input_length, dtype=tf.float32) return tf.cast( tf.tensordot(tf.cast(sampled_idx_indicator, tf.float32), reprojections, axes=[0, 0]), tf.bool)
def last_value_quantize(self, inputs, per_channel=False, init_min=-6.0, init_max=6.0, name_prefix='FixedValueQuant', reuse=None, is_training=False, num_bits=8, narrow_range=False, relative_quantile=0, freeze=False, quant_delay=False): """Adds a layer that collects quantization ranges as last input ranges. LastValueQuantize creates variables called 'min' and 'max', representing the interval used for quantization and clamping. Args: inputs: a tensor containing values to be quantized. per_channel: (Optional) a boolean specifying whether to use different quantization ranges per output channel. init_min: a float scalar, the initial value for variable min. init_max: a float scalar, the initial value for variable max. name_prefix: name_prefix for created nodes. reuse: whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. is_training: Whether the op is applied to a training or eval graph. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. relative_quantile: Specify the location of quantization min and max parameters. relative_quantile = 0 is equivalent to using min and max of input; relative_quantile = 1 set min and max the optimal location assuming the input distribution is uniform. In reality, a good value should be in the range [0 1]. freeze: If True, the min and max variables are calculated once at the begining of training and then freeze. This is used for quantized fine-tuning of a pretrained checkpoint. If False, the min and max are calculated and updated every cycle. quant_delay: The number of global steps after which the fake quantization are turned on. Used for performing fine-tuning experiment without starting from a pre-trained checkpoint. Returns: a tensor containing quantized values. """ with tf.variable_scope( None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope: scope.set_partitioner(None) input_shape = inputs.get_shape() input_dim = len(input_shape) if per_channel: # Only support quantizing 1-, 2- and 4-dimensional tensors. assert input_dim in [1, 2, 4] min_max_shape = [input_shape[-1]] else: min_max_shape = [] min_var = tf.get_variable('min', min_max_shape, tf.float32, initializer=tf.constant_initializer(init_min), trainable=False) max_var = tf.get_variable('max', min_max_shape, tf.float32, initializer=tf.constant_initializer(init_max), trainable=False) if not is_training: return self.delayed_quant( inputs, min_var, max_var, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range, quant_delay=None) if per_channel: if input_dim == 2: reduce_dims = [0] elif input_dim == 4: reduce_dims = [0, 1, 2] if num_bits >= 4: quantile = 0 else: quantile = (1.0 / 2.0**(num_bits + 1.0)) * relative_quantile * 100 if per_channel: if input_dim >= 2: batch_min = tfp.stats.percentile( inputs, q=quantile, axis=reduce_dims, name='BatchMin') else: batch_min = inputs else: batch_min = tfp.stats.percentile( inputs, q=quantile, name='BatchMin') if per_channel: if input_dim >= 2: batch_max = tfp.stats.percentile( inputs, q=100 - quantile, axis=reduce_dims, name='BatchMax') else: batch_max = inputs else: batch_max = tfp.stats.percentile( inputs, q=100 - quantile, name='BatchMax') if narrow_range: multiplier = 1.0 else: multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0) batch_abs_max = tf.maximum(tf.abs(batch_min), tf.abs(batch_max)) if narrow_range: batch_adjusted_min = 0 - batch_abs_max else: multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0) batch_adjusted_min = 0 - tf.scalar_mul(multiplier, batch_abs_max) batch_abs_max = tf.cast(batch_abs_max, tf.float32) batch_adjusted_min = tf.cast(batch_adjusted_min, tf.float32) if freeze: def make_var_op(var): def f(): return var return f quant_step = common.CreateOrGetQuantizationStep() min_max_assign = tf.less_equal( quant_step, 1, name='MinMaxAssign') min_value = tf.cond(min_max_assign, make_var_op(batch_adjusted_min), make_var_op(min_var), name='AssignMinCond') max_value = tf.cond(min_max_assign, make_var_op(batch_abs_max), make_var_op(max_var), name='AssignMaxCond') else: min_value = batch_adjusted_min max_value = batch_abs_max assign_min = tf.assign(min_var, min_value) assign_max = tf.assign(max_var, max_value) return self.delayed_quant( inputs, assign_min, assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range, quant_delay=quant_delay)
def train(model_path, learning_rate, epoch, noisy=False): total_epoch = epoch teacher = nin() student = lenet() if noisy == True: drop_scale = 1 / Nratio noisy_mask = tf.nn.dropout(tf.constant( np.float32(np.ones((batch_size, 1))) / drop_scale), keep_prob=Nratio) #(batchsize,1) gaussian = tf.random_normal(shape=[batch_size, 1], mean=0.0, stddev=Nsigma) noisy = tf.mul(noisy_mask, gaussian) #noisy_add = tf.add(tf.constant(np.float32(np.ones((batch_size,1)))), noisy) teacher = tf.mul(teacher, tf.tile(noisy, tf.constant([1, 10]))) #(batchsize,10) #teacher = tf.add(teacher, tf.tile(noisy,tf.constant([1,10]))) print(bcolors.G + "prepare for training, noisy mode" + bcolors.END) tf_loss = tf.nn.l2_loss(teacher - student) / batch_size elif KD == True: # correct Hinton method at 2017.1.3 print(bcolors.G + "prepare for training, knowledge distilling mode" + bcolors.END) one_hot = tf.one_hot(y, n_classes, 1.0, 0.0) #one_hot = tf.cast(one_hot_int, tf.float32) teacher_tau = tf.scalar_mul(1.0 / tau, teacher) student_tau = tf.scalar_mul(1.0 / tau, student) objective1 = tf.nn.sigmoid_cross_entropy_with_logits( student_tau, one_hot) objective2 = tf.scalar_mul(0.5, tf.square(student_tau - teacher_tau)) tf_loss = (lamda * tf.reduce_sum(objective1) + (1 - lamda) * tf.reduce_sum(objective2)) / batch_size else: print(bcolors.G + "prepare for training, NIPS2014 mode" + bcolors.END) tf_loss = tf.nn.l2_loss(teacher - student) / batch_size optimizer1 = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(tf_loss) optimizer2 = tf.train.AdamOptimizer(learning_rate=learning_rate / 10).minimize(tf_loss) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.InteractiveSession(config=tf.ConfigProto( gpu_options=gpu_options, allow_soft_placement=True)) tf.initialize_all_variables().run() with tf.device('/cpu:0'): saver = tf.train.Saver(max_to_keep=100) #saver.restore(sess, os.path.join(model_path,'model-99') data, label = read_cifar10('train') index = np.array(range(len(data))) # index randomly ordered mean = cal_mean() begin = time.time() iterations = len(data) // batch_size decay_step = int(total_epoch * 0.8) cnt = 0 dropout_rate = dropout print(bcolors.G + "number of iterations (per epoch) =" + str(len(data) / batch_size) + bcolors.END) for i in range(total_epoch): np.random.shuffle(index) cost_sum = 0 for j in range(iterations): batch_x = np.float32( data[index[j * batch_size:(j + 1) * batch_size]]) - mean batch_y = np.squeeze( np.float32(label[index[j * batch_size:(j + 1) * batch_size]])) if cnt / decay_step == 0: lr = learning_rate _, cost = sess.run([optimizer1, tf_loss], feed_dict={ x: batch_x, y: batch_y, keep_prob: 1 - dropout_rate }) elif cnt / decay_step == 1: lr = learning_rate / 10 _, cost = sess.run([optimizer2, tf_loss], feed_dict={ x: batch_x, y: batch_y, keep_prob: 1 - dropout_rate }) cost_sum += cost #pdb.set_trace() #if (j % int(iterations*0.25) == 0): # print(("epoch %d-iter %d, cost = %f , avg-cost = %f"%(i, j, cost, cost/n_classes)) # sys.stdout.flush() cnt += 1 avg_time = time.time() - begin print( "epoch %d - avg. %f seconds in each epoch, lr = %.0e, cost = %f , avg-cost-per-logits = %f" % (i, avg_time / cnt, lr, cost_sum, cost_sum / iterations / n_classes)) if np.mod(i + 1, 10) == 0: print("Epoch ", i + 1, " is done. Saving the model ...") with tf.device('/cpu:0'): if not os.path.exists(model_path): os.makedirs(model_path) saver.save(sess, os.path.join(model_path, 'model'), global_step=i) sys.stdout.flush()
def main(): """ Create the model and start the training """ # Get the CL arguments args = get_arguments() # Check if the network architecture is valid if args.arch not in VALID_ARCHS: raise ValueError("Network architecture %s is not supported!"%(args.arch)) # Check if the method to compute importance is valid if args.imp_method not in MODELS: raise ValueError("Importance measure %s is undefined!"%(args.imp_method)) # Check if the optimizer is valid if args.optim not in VALID_OPTIMS: raise ValueError("Optimizer %s is undefined!"%(args.optim)) # Create log directories to store the results if not os.path.exists(args.log_dir): print('Log directory %s created!'%(args.log_dir)) os.makedirs(args.log_dir) # Generate the experiment key and store the meta data in a file exper_meta_data = {'DATASET': 'PERMUTE_MNIST', 'NUM_RUNS': args.num_runs, 'TRAIN_SINGLE_EPOCH': args.train_single_epoch, 'IMP_METHOD': args.imp_method, 'SYNAP_STGTH': args.synap_stgth, 'FISHER_EMA_DECAY': args.fisher_ema_decay, 'FISHER_UPDATE_AFTER': args.fisher_update_after, 'OPTIM': args.optim, 'LR': args.learning_rate, 'BATCH_SIZE': args.batch_size, 'MEM_SIZE': args.mem_size} experiment_id = "PERMUTE_MNIST_HERDING_%s_%s_%s_%s_%r_%s-"%(args.arch, args.train_single_epoch, args.imp_method, str(args.synap_stgth).replace('.', '_'), str(args.batch_size), str(args.mem_size)) + datetime.datetime.now().strftime("%y-%m-%d-%H-%M") snapshot_experiment_meta_data(args.log_dir, experiment_id, exper_meta_data) # Get the subset of data depending on training or cross-validation mode if args.online_cross_val: num_tasks = K_FOR_CROSS_VAL else: num_tasks = NUM_TASKS - K_FOR_CROSS_VAL # Variables to store the accuracies and standard deviations of the experiment acc_mean = dict() acc_std = dict() # Reset the default graph ops.reset_default_graph() graph = tf.Graph() with graph.as_default(): # Set the random seed tf.set_random_seed(args.random_seed) # Define Input and Output of the model x = tf.placeholder(tf.float32, shape=[None, INPUT_FEATURE_SIZE]) #x = tf.placeholder(tf.float32, shape=[None, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS]) if args.imp_method == 'PNN': y_ = [] for i in range(num_tasks): y_.append(tf.placeholder(tf.float32, shape=[None, TOTAL_CLASSES])) else: y_ = tf.placeholder(tf.float32, shape=[None, TOTAL_CLASSES]) # Define the optimizer if args.optim == 'ADAM': opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate) elif args.optim == 'SGD': opt = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate) elif args.optim == 'MOMENTUM': base_lr = tf.constant(args.learning_rate) learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - train_step / training_iters), OPT_POWER)) opt = tf.train.MomentumOptimizer(args.learning_rate, OPT_MOMENTUM) # Create the Model/ contruct the graph model = Model(x, y_, num_tasks, opt, args.imp_method, args.synap_stgth, args.fisher_update_after, args.fisher_ema_decay, network_arch=args.arch) # Set up tf session and initialize variables. if USE_GPU: config = tf.ConfigProto() config.gpu_options.allow_growth = True else: config = tf.ConfigProto( device_count = {'GPU': 0} ) time_start = time.time() with tf.Session(config=config, graph=graph) as sess: runs = train_task_sequence(model, sess, args) # Close the session sess.close() time_end = time.time() time_spent = time_end - time_start # Store all the results in one dictionary to process later exper_acc = dict(mean=runs) # If cross-validation flag is enabled, store the stuff in a text file if args.cross_validate_mode: acc_mean = runs.mean(0) acc_std = runs.std(0) cross_validate_dump_file = args.log_dir + '/' + 'PERMUTE_MNIST_%s_%s'%(args.imp_method, args.optim) + '.txt' with open(cross_validate_dump_file, 'a') as f: if MULTI_TASK: f.write('GPU:{} \t ARCH: {} \t LR:{} \t LAMBDA: {} \t ACC: {}\n'.format(USE_GPU, args.arch, args.learning_rate, args.synap_stgth, acc_mean[-1, :].mean())) else: f.write('GPU: {} \t ARCH: {} \t LR:{} \t LAMBDA: {} \t ACC: {} \t Fgt: {} \t Time: {}\n'.format(USE_GPU, args.arch, args.learning_rate, args.synap_stgth, acc_mean[-1, :].mean(), compute_fgt(acc_mean), str(time_spent))) # Store the experiment output to a file snapshot_experiment_eval(args.log_dir, experiment_id, exper_acc)
plt.scatter(data['x'][:, 0], data['y']) plt.xlabel('Date') plt.ylabel('Number of newly infected') X = tf.placeholder(name='X', shape=(None, nb_features), dtype=tf.float32) Y = tf.placeholder(name='Y', shape=(None), dtype=tf.float32) w = tf.Variable(tf.zeros(nb_features), name='W') bias = tf.Variable(0.0) w_col = tf.reshape(w, (nb_features, 1), name='W_col') hyp = tf.add(tf.matmul(X, w_col), bias, name='Hyp') Y_col = tf.reshape(Y, (-1, 1), name='Y_col') l2_reg = tf.scalar_mul(lmbd, tf.reduce_mean(tf.square(w)), name='L2_reg') mse = tf.reduce_mean(tf.square(hyp - Y_col), name='Mse') loss = tf.add(mse, l2_reg, name='loss') opt_op = tf.train.AdamOptimizer(name="opt_op").minimize(loss) with tf.Session() as sess: writer = tf.summary.FileWriter('./graphs', graph=sess.graph) sess.run(tf.global_variables_initializer()) # Izvršavamo 100 epoha treninga. nb_epochs = 100 for epoch in range(nb_epochs): # Stochastic Gradient Descent.
def _define_desc_graph(self): with tf.variable_scope('desc'): self.desc1 = AM_desc1_batch = tf.placeholder( dtype=tf.float32, shape=[None, self.default_desc_length, self.wv_dim], name='desc1') self.desc2 = AM_desc2_batch = tf.placeholder( dtype=tf.float32, shape=[None, self.default_desc_length, self.wv_dim], name='desc2') gru_1 = tf.keras.layers.GRU(units=self.wv_dim, return_sequences=True) gru_5 = tf.keras.layers.GRU(units=self.wv_dim, return_sequences=True) conv1 = tf.keras.layers.Conv1D(filters=self.wv_dim, kernel_size=3, strides=1, activation=tf.tanh, padding='valid', use_bias=True) ds3 = tf.keras.layers.Dense(units=self.wv_dim, activation=tf.tanh, use_bias=True) self._att1 = att1 = tf.keras.layers.Dense(units=1, activation='tanh', use_bias=True) self._att3 = att3 = tf.keras.layers.Dense(units=1, activation='tanh', use_bias=True) # gru_+att1 mp1_b = conv1(gru_1(AM_desc1_batch)) mp2_b = conv1(gru_1(AM_desc2_batch)) att1_w = tf.keras.activations.softmax(att1(mp1_b), axis=-2) att2_w = tf.keras.activations.softmax(att1(mp2_b), axis=-2) size1 = self.default_desc_length mp1_b = tf.multiply(mp1_b, tf.scalar_mul(size1, att1_w)) mp2_b = tf.multiply(mp2_b, tf.scalar_mul(size1, att2_w)) # gru_+at3 mp1_b = gru_5(mp1_b) mp2_b = gru_5(mp2_b) att1_w = tf.keras.activations.softmax(att3(mp1_b), axis=-2) att2_w = tf.keras.activations.softmax(att3(mp2_b), axis=-2) mp1_b = tf.multiply(mp1_b, att1_w) mp2_b = tf.multiply(mp2_b, att2_w) # last ds ds1_b = tf.reduce_sum(mp1_b, 1) ds2_b = tf.reduce_sum(mp2_b, 1) eb_desc_batch1 = tf.nn.l2_normalize(ds3(ds1_b), dim=1) eb_desc_batch2 = tf.nn.l2_normalize( ds3(ds2_b), dim=1) # tf.nn.l2_normalize(DS4(ds2_b), dim=1) indicator = np.empty((self.desc_batch_size, self.desc_batch_size), dtype=np.float32) indicator.fill(self.negative_indication_weight) np.fill_diagonal(indicator, 1.) indicator = tf.constant(indicator) self.desc_loss = -tf.reduce_sum( tf.log( tf.sigmoid( tf.multiply( tf.matmul(eb_desc_batch1, tf.transpose(eb_desc_batch2)), indicator)) + 0.)) / self.desc_batch_size self.desc_embedding1 = eb_desc_batch1 self.desc_embedding2 = eb_desc_batch2 # opt_vars = [v for v in tf.trainable_variables() if v.name.startswith("desc")] self.desc_optimizer = get_optimizer( self.args.optimizer, self.args.learning_rate).minimize(self.desc_loss)
def main(_): # Configure checkpoint/samples dir tl.files.exists_or_mkdir(a.checkpoint_dir) tl.files.exists_or_mkdir(a.sample_dir) #read gaussian CLIP = [-0.01, 0.01] CRITIC_NUM = 5 data_files = os.listdir("./gaussian_dataset") num_files = len(data_files) for i in range(num_files): data_files[i] = int(data_files[i].split('.')[0].split('_')[2]) # print(data_files) data_files.sort() #print(data_files) for i in range(num_files): data_files[i] = "./gaussian_dataset/gaussianheavy_blackaverage_" + str( data_files[i]).zfill(4) + ".png" images = [] for file in data_files: image = get_image(file, a.image_size, is_crop=a.is_crop, resize_w=a.output_size, is_grayscale=False) #bark36-color channel=3 image = image[:, :, np.newaxis] #print(image.shape) #time.sleep(5) images.append(image) # Construct graph on GPU with tf.device("/gpu:0"): #Define Models # ################################################################################################ x_l = tf.placeholder(tf.float32, [None, 1], name='x_noise') y_l = tf.placeholder(tf.float32, [None, 1], name='y_noise') z_l = tf.placeholder(tf.float32, [None, 1], name='z_noise') #z = [tf.cos(theta),tf.sin(theta)] # x_l = 10*tf.sin(phi)*tf.cos(theta) # y_l = 10*tf.sin(phi)*tf.sin(theta) # z_l = 10*tf.cos(phi) z = tf.concat([x_l, y_l, z_l], axis=1) real_images = tf.placeholder( tf.float32, [None, a.output_size, a.output_size, a.c_dim], name='real_images') sess = tf.InteractiveSession() # Input noise into generator for training####################reuse net_g = generator(z, is_train=True, reuse=False) #net_g = generator(z , is_train=True, reuse=True) # Input real and generated fake images into discriminator for training net_d1, d1g_logits1 = discriminator1(net_g.outputs, is_train=True, reuse=False) #net_d, d_logits = discriminator(net_g.outputs, is_train=True, reuse=True) _, d1x_logits1 = discriminator1(real_images, is_train=True, reuse=True) # Input noise into generator for ###################################evaluation # set is_train to False so that BatchNormLayer behave differently net_g2 = generator(z, is_train=False, reuse=True) #Define Training Operations # # discriminator: real images are labelled as 1 ##############by using tf.ones_like(),make every tensor to be 1,as target #d1_loss_real = tl.cost.sigmoid_cross_entropy(d1x_logits1, tf.ones_like(d1x_logits1), name='d1real') d1_loss_real = tf.reduce_mean( tf.scalar_mul(-1, d1x_logits1, name='d1real')) # discriminator: images from generator (fake) are labelled as 0 #d1_loss_fake = tl.cost.sigmoid_cross_entropy(d1g_logits1, tf.zeros_like(d1g_logits1), name='d1fake') d1_loss_fake = tf.reduce_mean(d1g_logits1, name='d1fake') # cost for updating discriminator d1_loss = 0.5 * (d1_loss_real + d1_loss_fake) #d2 # Input real and generated fake images into discriminator for training #net_d2, d2g_logits2 = discriminator2(net_g.outputs, is_train=True, reuse=False) net_d2, d2g_logits2 = discriminator2(real_images, is_train=True, reuse=False) #net_d, d_logits = discriminator(net_g.outputs, is_train=True, reuse=True) #_, d2x_logits2 = discriminator2(real_images, is_train=True, reuse=True) _, d2x_logits2 = discriminator2(net_g.outputs, is_train=True, reuse=True) #with tf.name_scope("d2_loss_real"): #d2_loss_real = tl.cost.sigmoid_cross_entropy(d2x_logits2, tf.zeros_like(d2x_logits2), name='d2real') d2_loss_real = tf.reduce_mean( tf.scalar_mul(-1, d2x_logits1, name='d2real')) # discriminator: images from generator (fake) are labelled as 0 #with tf.name_scope("d2_loss_real"): #d2_loss_fake = tl.cost.sigmoid_cross_entropy(d2g_logits2, tf.ones_like(d2g_logits2), name='d2fake') d2_loss_fake = tf.reduce_mean(d2g_logits1, name='d2fake') # cost for updating discriminator #with tf.name_scope("d2_loss"): d2_loss = 0.5 * (d2_loss_real + d2_loss_fake) #with tf.name_scope("d_loss"): d_loss = d1_loss + d2_loss h4_params = tl.layers.get_variables_with_name( name='discriminator/d/h4/lin_sigmoid', train_only=True) h5_params = tl.layers.get_variables_with_name( name='discriminator/d/h5/lin_sigmoid', train_only=True) l2_params = h4_params + h5_params l2_wl = 0.0002 for p in l2_params: weight_loss = tf.multiply(tf.nn.l2_loss(p), l2_wl) d_loss += weight_loss # generator: try to make the the fake images look real (1) #g1_loss = tl.cost.sigmoid_cross_entropy(d1g_logits1, tf.ones_like(d1g_logits1), name='g1fake') #g2_loss = tl.cost.sigmoid_cross_entropy(d2g_logits2, tf.zeros_like(d2g_logits2), name='g2fake') g1_loss = tf.reduce_mean(tf.scalar_mul(-1, d1g_logits1), name='g1fake') g2_loss = tf.reduce_mean(tf.scalar_mul(-1, d2g_logits2), name='g2fake') g_loss = 0.5 * (g1_loss + g2_loss) g_vars = tl.layers.get_variables_with_name('generator', True, True) d_vars = tl.layers.get_variables_with_name('discriminator', True, True) # Define optimizers for updating discriminator and generator # d_optim = tf.train.AdamOptimizer(a.learning_rate, beta1=a.beta1) \ # .minimize(d_loss, var_list=d_vars) # g_optim = tf.train.AdamOptimizer(a.learning_rate, beta1=a.beta1) \ # .minimize(g_loss, var_list=g_vars) d_optim = tf.train.RMSPropOptimizer(a.learning_rate)\ .minimize(d_loss, var_list=d_vars) g_optim = tf.train.RMSPropOptimizer(a.learning_rate) \ .minimize(g_loss, var_list=g_vars) clip_d_op = [ var.assign(tf.clip_by_value(var, CLIP[0], CLIP[1])) for var in d_vars ] # Init Session #sess = tf.InteractiveSession() f = pd.read_csv('./plant6.csv') f.columns = ["COL1", "COL2", "COL3"] x_label = f[["COL1"]] x_label = np.array(x_label) y_label = f[["COL2"]] y_label = np.array(y_label) z_label = f[["COL3"]] z_label = np.array(z_label) index2 = np.arange(0, 72, 1) # for i in range(num_files): # x_label[i] = x_label # y_label[i] = y_label # z_label[i] = z_label images = np.asarray(images) sample_x_label = x_label[index2] sample_y_label = y_label[index2] sample_z_label = z_label[index2] sample_image = images[index2] batch_x_label = x_label[index2] batch_y_label = y_label[index2] batch_z_label = z_label[index2] batch_images = images[index2] with tf.name_scope('summary'): tf.summary.scalar('d1_loss', d1_loss) tf.summary.scalar('d2_loss', d2_loss) tf.summary.scalar('d_loss', d_loss) tf.summary.scalar('g1_loss', g1_loss) tf.summary.scalar('g2_loss', g2_loss) tf.summary.scalar('g_loss', g_loss) merged = tf.summary.merge_all() writer = tf.summary.FileWriter('./logs', sess.graph) sess.run(tf.global_variables_initializer()) model_dir = "%s_%s_%s" % (a.dataset, a.batch_size, a.output_size) save_dir = os.path.join(a.checkpoint_dir, model_dir) tl.files.exists_or_mkdir(a.sample_dir) tl.files.exists_or_mkdir(save_dir) # load the latest checkpoints net_g_name = os.path.join(save_dir, 'net_g.npz') net_d1_name = os.path.join(save_dir, 'net_d1.npz') net_d2_name = os.path.join(save_dir, 'net_d2.npz') #Training models # iter_counter = 0 index = np.arange(72) for epoch in range(a.epoch): np.random.shuffle(index) #steps = 0 for start_index in range(0, 72, a.batch_size): end_index = start_index + a.batch_size start_time = time.time() if start_index < 25 or start_index % 500 == 0: critic_num = 25 else: critic_num = CRITIC_NUM for _ in range(critic_num): # Updates the Discriminator(D) summary, errD, _ = sess.run( [merged, d_loss, d_optim], feed_dict={ x_l: batch_x_label[index[start_index:end_index]], y_l: batch_y_label[index[start_index:end_index]], z_l: batch_z_label[index[start_index:end_index]], real_images: batch_images[index[start_index:end_index]] }) sess.run(clio_d_op) # Updates the Discriminator(D) # summary, errD, _ = sess.run([merged, d_loss, d_optim], feed_dict={x_l: batch_x_label[index[start_index:end_index]], # y_l: batch_y_label[index[start_index:end_index]],z_l: batch_z_label[index[start_index:end_index]], # real_images: batch_images[index[start_index:end_index]]}) # Updates the Generator(G) # run generator twice to make sure that d_loss does not go to zero (different from paper)########################## for _ in range(2): errG, _ = sess.run( [g_loss, g_optim], feed_dict={ x_l: batch_x_label[index[start_index:end_index]], y_l: batch_y_label[index[start_index:end_index]], z_l: batch_z_label[index[start_index:end_index]] }) end_time = time.time() - start_time #print("Epoch: [%2d/%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \ # % (epoch, FLAGS.epoch, steps, batch_steps, end_time, errD, errG)) print("Epoch: [%2d/%2d] time: %4.4f, d_loss: %.8f, g_loss: %.8f" \ % (epoch, a.epoch, end_time, errD, errG)) iter_counter += 1 if np.mod(iter_counter, a.sample_step) == 0: # Generate images########################################################################the diffrence with feed-z_batch ? img, errD, errG = sess.run( [net_g2.outputs, d_loss, g_loss], feed_dict={ x_l: sample_x_label, y_l: sample_y_label, z_l: sample_z_label, real_images: sample_image }) # Visualize generated images #tl.visualize.save_images(img, [num_tiles, num_tiles], './{}/train_{:02d}_{:04d}.png'.format(FLAGS.sample_dir, epoch, steps)) print("[Sample] d_loss: %.8f, g_loss: %.8f" % (errD, errG)) if np.mod(iter_counter, a.save_step) == 0: # Save current network parameters print("[*] Saving checkpoints...") tl.files.save_npz(net_g.all_params, name=net_g_name, sess=sess) tl.files.save_npz(net_d1.all_params, name=net_d1_name, sess=sess) tl.files.save_npz(net_d2.all_params, name=net_d2_name, sess=sess) print("[*] Saving checkpoints SUCCESS!") writer.add_summary(summary, iter_counter) #https://www.cnblogs.com/Charles-Wan/p/6501945.html print("finish training, start testing...") #150 # noise_theta = np.zeros(shape = [150,1],dtype = np.float32) # for i in range(150): # noise_theta[i] = np.array([(360.0/150.0)*i*math.pi/180]) # index = np.arange(0,150,1) # test_theta =noise_theta[index] # generated_images = sess.run(net_g2.outputs, # feed_dict={ # theta: test_theta, # }) t = pd.read_csv('./location_150.csv') t.columns = ["COL1", "COL2", "COL3"] x_test = t[["COL1"]] x_test = np.array(x_test) y_test = t[["COL2"]] y_test = np.array(y_test) z_test = t[["COL3"]] z_test = np.array(z_test) index = np.arange(0, 150, 1) x_test = x_test[index] y_test = y_test[index] z_test = z_test[index] generated_images = sess.run(net_g2.outputs, feed_dict={ x_l: x_test, y_l: y_test, z_l: z_test, }) #img=[] for i in range(150): #img = img_as_ubyte(generated_images[i]) #tf.image.encode_png(generated_images[i],compression=-1,name=None) # mn = generated_images[i].min() # mx = generated_images[i].max() # mx -= mn # generated_images[i]=generated_images[i].astype(np.uint8) #generated_images[i] = generated_images[i]/generated_images[i].max() #generated_images[i] = 255*generated_images[i] #tl.visualize.save_image(generated_images[i].astype(np.uint8), './{}/train_{:02d}.png'.format(FLAGS.sample_dir, i)) #steps += 1 generated_images[i] = 128 * generated_images[i] + 127 np.clip(generated_images[i], 0, 255) tl.visualize.save_image( generated_images[i].astype(np.uint8), './{}/train_{:02d}.png'.format(a.sample_dir, i)) print("testing is finished") writer.close() sess.close()
def build_one_phase(layerxk, layerzk, Phi, PhiT, Yinput, phase, lambdavalue): # params lambdaStep = tf.Variable(lambdavalue, dtype=tf.float32) eta = 0.95 xi = 0.95 softThr = tf.Variable(0.1, dtype=tf.float32) t = tf.Variable(1, dtype=tf.float32) convSize1 = 64 convSize2 = 64 convSize3 = 64 filterSize1 = 3 filterSize2 = 3 filterSize3 = 3 # get rk from zk rk = tf.reduce_sum(tf.multiply(Phi, layerzk[-1]), axis=3) rk = tf.reshape(rk, shape=[-1, pixel, pixel, 1]) rk = tf.subtract(rk, Yinput) rk = tf.multiply(PhiT, tf.tile(rk, [1, 1, 1, nFrame])) rk = tf.scalar_mul(lambdaStep, rk) rk = tf.subtract(layerzk[-1], rk) # F(rk) weight0 = get_filter([filterSize1, filterSize1, nFrame, convSize1], 0) weight11 = get_filter([filterSize2, filterSize2, convSize1, convSize2], 11) weight12 = get_filter([filterSize3, filterSize3, convSize2, convSize3], 12) Frk = tf.nn.conv2d(rk, weight0, strides=[1, 1, 1, 1], padding='SAME') tmp = Frk Frk = tf.nn.conv2d(Frk, weight11, strides=[1, 1, 1, 1], padding='SAME') Frk = tf.nn.relu(Frk) Frk = tf.nn.conv2d(Frk, weight12, strides=[1, 1, 1, 1], padding='SAME') # soft threshold, soft(F(rk), softThr) softFrk = tf.multiply(tf.sign(Frk), tf.nn.relu(tf.subtract(tf.abs(Frk), softThr))) # ~F(soft(F(rk), softThr)) weight13 = get_filter([filterSize3, filterSize3, convSize3, convSize2], 53) weight14 = get_filter([filterSize2, filterSize2, convSize2, convSize1], 54) weight6 = get_filter([filterSize1, filterSize1, convSize1, nFrame], 6) FFrk = tf.nn.conv2d(softFrk, weight13, strides=[1, 1, 1, 1], padding='SAME') FFrk = tf.nn.relu(FFrk) FFrk = tf.nn.conv2d(FFrk, weight14, strides=[1, 1, 1, 1], padding='SAME') FFrk = tf.nn.conv2d(FFrk, weight6, strides=[1, 1, 1, 1], padding='SAME') # xk = rk + ~F(soft(F(rk), softThr)) xk = tf.add(rk, FFrk) print(t) zk = t * xk + (1 - t) * layerxk[-1] if (phase >= 1): delta0 = eta * tf.norm(layerxk[-1] - layerxk[-2]) delta1 = tf.norm(xk - layerxk[-1]) larger = tf.math.less(delta0, delta1) if (larger == "True"): lambdavalue = xi * lambdavalue # Symmetric constraint sFFrk = tf.nn.conv2d(Frk, weight13, strides=[1, 1, 1, 1], padding='SAME') sFFrk = tf.nn.relu(sFFrk) sFFrk = tf.nn.conv2d(sFFrk, weight14, strides=[1, 1, 1, 1], padding='SAME') symmetric = sFFrk - tmp return xk, zk, symmetric, Frk, lambdavalue
def rnn_decoder(self, encode_embed, attention_states, initial_state, cell, num_heads=1, loop_function=None, dtype=dtypes.float32, scope=None, initial_state_attention=False): """RNN decoder for the sequence-to-sequence model. """ with tf.variable_scope(scope or "rnn_decoder"): batch_size = tf.shape(encode_embed[0])[0] # Needed for reshaping. # cprint('batch_size: {}'.format(batch_size), 'green') # Tensor("ranking_model/ranking_model/embedding_rnn_decoder/rnn_decoder/strided_slice_1:0", shape=(), dtype=int32) # cprint('batch_size.get_shape(): {}'.format(batch_size.get_shape()), 'red') # () # number of output vector in sequence attn_length = attention_states.get_shape()[1].value # the dimension size of each output vector attn_size = attention_states.get_shape()[2].value # the dimension size of state vector state_size = initial_state.get_shape()[1].value print(batch_size, attn_length, attn_size, state_size, "batch_size, attn_length, attn_size, state_size") # To calculate W1 * h_t we use a 1-by-1 convolution, need to # reshape before. print(attention_states.get_shape(), "attention_states.get_shape()") # (?, 9, 186) hidden = tf.reshape(attention_states, [-1, attn_length, 1, attn_size]) hidden_features = [] hidden_features2 = [] v = [] u = [] linear_w = [] linear_b = [] abstract_w = [] abstract_b = [] abstract_layers = [ int((attn_size + state_size) / (2 + 2 * i)) for i in xrange(2) ] + [1] # Size of query vectors for attention. attention_vec_size = attn_size head_weights = [] for a in xrange(num_heads): k = self.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features.append( nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")) # [B,T,1,attn_vec_size] k2 = self.get_variable("AttnW2_%d" % a, [1, 1, attn_size, attention_vec_size]) hidden_features2.append( nn_ops.conv2d(hidden, k2, [1, 1, 1, 1], "SAME")) v.append( self.get_variable("AttnV_%d" % a, [attention_vec_size])) u.append( self.get_variable("AttnU_%d" % a, [attention_vec_size])) head_weights.append( self.get_variable("head_weight_%d" % a, [1])) current_layer_size = attn_size + state_size linear_w.append( self.get_variable("linearW_%d" % a, [1, 1, current_layer_size, 1])) linear_b.append(self.get_variable("linearB_%d" % a, [1])) abstract_w.append([]) abstract_b.append([]) for i in xrange(len(abstract_layers)): layer_size = abstract_layers[i] abstract_w[a].append( self.get_variable( "Att_%d_layerW_%d" % (a, i), [1, 1, current_layer_size, layer_size])) abstract_b[a].append( self.get_variable("Att_%d_layerB_%d" % (a, i), [layer_size])) current_layer_size = layer_size def attention(query): """Put attention masks on hidden using hidden_features and query.""" ds = [] # Results of attention reads will be stored here. aw = [] # Attention weights will be stored here tiled_query = tf.tile( tf.reshape(query, [-1, 1, 1, state_size]), [1, attn_length, 1, 1]) print(hidden.get_shape(), "hidden.get_shape()") # (?, 9, 1, 186) print(tiled_query.get_shape(), "tiled_query.get_shape()") # (?, 9, 1, 186) concat_input = tf.concat(axis=3, values=[hidden, tiled_query]) #concat_input = tf.concat(3, [hidden, hidden]) for a in xrange(num_heads): with tf.variable_scope("Attention_%d" % a): s = None if self.hparams.att_strategy == 'multi': print('Attention: multiply') y = linear( query, attention_vec_size, True ) # 第三个参数是boolean, whether to add a bias term or not. y = tf.reshape(y, [-1, 1, 1, attention_vec_size]) # s = math_ops.reduce_sum( # u[a] * math_ops.tanh(y * hidden_features[a]), [2, # 3]) s = math_ops.reduce_sum(hidden * math_ops.tanh(y), [2, 3]) # hidden_features[a] * math_ops.tanh(y), [2, 3]) elif self.hparams.att_strategy == 'multi_add': print('Attention: multiply_add') y = linear(query, attention_vec_size, True, scope='y') y2 = linear(query, attention_vec_size, True, scope='y2') y = tf.reshape(y, [-1, 1, 1, attention_vec_size]) y2 = tf.reshape(y2, [-1, 1, 1, attention_vec_size]) # s = math_ops.reduce_sum( # u[a] * math_ops.tanh(y * hidden_features[a]), [2, # 3]) s = math_ops.reduce_sum(hidden * math_ops.tanh(y2), [2, 3]) s = s + math_ops.reduce_sum( v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) elif self.hparams.att_strategy == 'NTN': print('Attention: NTN') y = linear(query, attn_size, False) y = tf.tile(tf.reshape(y, [-1, 1, 1, attn_size]), [1, attn_length, 1, 1]) s = math_ops.reduce_sum(hidden * y, [2, 3]) # bilnear s = s + math_ops.reduce_sum( nn_ops.conv2d(concat_input, linear_w[a], [1, 1, 1, 1], "SAME"), [2, 3]) # linear s = s + linear_b[a] # bias # print(s.get_shape()) # s = tf.tanh(s) #non linear elif self.hparams.att_strategy == 'elu': print('Attention: elu') cur_input = concat_input # for i in xrange(len(abstract_layers)): # cur_input = tf.contrib.layers.fully_connected(cur_input, abstract_layers[i], activation_fn=tf.nn.elu) for i in xrange(len(abstract_layers)): cur_input = nn_ops.conv2d( cur_input, abstract_w[a][i], [1, 1, 1, 1], "SAME") cur_input = cur_input + abstract_b[a][i] cur_input = tf.nn.elu(cur_input) s = math_ops.reduce_sum(cur_input, [2, 3]) else: print('Attention: add') y = linear(query, attention_vec_size, True) y = tf.reshape(y, [-1, 1, 1, attention_vec_size]) s = math_ops.reduce_sum( v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3]) att = s * head_weights[a] # nn_ops.softmax(s) aw.append(att) # Now calculate the attention-weighted vector d. d = math_ops.reduce_sum( tf.reshape(att, [-1, attn_length, 1, 1]) * hidden, [1, 2]) ds.append(tf.reshape(d, [-1, attn_size])) return aw, ds state = initial_state outputs = [] prev = None batch_attn_size = tf.stack([batch_size, attn_size]) batch_attw_size = tf.stack([batch_size, attn_length]) attns = [ tf.zeros(batch_attn_size, dtype=dtype) for _ in xrange(num_heads) ] attw = [ 1.0 / attn_length * tf.ones(batch_attw_size, dtype=dtype) for _ in xrange(num_heads) ] for a in attns: # Ensure the second shape of attention vectors is set. a.set_shape([None, attn_size]) # Directly use previous state attw, attns = attention(initial_state) aw = math_ops.reduce_sum(attw, 0) output = tf.scalar_mul(1.0 / float(num_heads), aw) output = output - tf.reduce_min(output, 1, keep_dims=True) outputs.append(output) return outputs, state
# ReLU Layer 1 Gradient dLdZ_1 = tf.multiply(tf.sign(A_1), dLdA_1) # Linear Layer 1 Weight Gradients dLdW_1 = tf.matmul(A_0, tf.transpose(dLdZ_1)) dLdW0_1 = tf.reduce_sum(dLdZ_1, axis=1, keepdims=True) # Linear Layer 1 Gradient dLdA_0 = tf.matmul(W_1, dLdZ_1) ################################################################################ # Parameter Update # ################################################################################ # Linear Layer 1 Weight Updates W_1_sgd_step = W_1.assign_sub(tf.scalar_mul(0.005, dLdW_1)) W0_1_sgd_step = W0_1.assign_sub(tf.scalar_mul(0.005, dLdW0_1)) # Linear Layer 2 Weight Updates W_2_sgd_step = W_2.assign_sub(tf.scalar_mul(0.005, dLdW_2)) W0_2_sgd_step = W0_2.assign_sub(tf.scalar_mul(0.005, dLdW0_2)) # Linear Layer 3 Weight Updates W_3_sgd_step = W_3.assign_sub(tf.scalar_mul(0.005, dLdW_3)) W0_3_sgd_step = W0_3.assign_sub(tf.scalar_mul(0.005, dLdW0_3)) # Grouped sgd_step = tf.group(W_3_sgd_step, W0_3_sgd_step, W_2_sgd_step, W0_2_sgd_step, W_1_sgd_step, W0_1_sgd_step)