def generalised_dice_loss(prediction, ground_truth, weight_map=None, type_weight='Square'): """ Function to calculate the Generalised Dice Loss defined in Sudre, C. et. al. (2017) Generalised Dice overlap as a deep learning loss function for highly unbalanced segmentations. DLMIA 2017 :param prediction: the logits :param ground_truth: the segmentation ground truth :param weight_map: :param type_weight: type of weighting allowed between labels (choice between Square (square of inverse of volume), Simple (inverse of volume) and Uniform (no weighting)) :return: the loss """ ground_truth = tf.to_int64(ground_truth) n_voxels = ground_truth.shape[0].value n_classes = prediction.shape[1].value ids = tf.constant(np.arange(n_voxels), dtype=tf.int64) ids = tf.stack([ids, ground_truth], axis=1) one_hot = tf.SparseTensor(indices=ids, values=tf.ones([n_voxels], dtype=tf.float32), dense_shape=[n_voxels, n_classes]) if weight_map is not None: weight_map_nclasses = tf.reshape( tf.tile(weight_map, [n_classes]), prediction.get_shape()) ref_vol = tf.sparse_reduce_sum( weight_map_nclasses * one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum( weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum( tf.multiply(weight_map_nclasses, prediction), 0) else: ref_vol = tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum(one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum(prediction, 0) if type_weight == 'Square': weights = tf.reciprocal(tf.square(ref_vol)) elif type_weight == 'Simple': weights = tf.reciprocal(ref_vol) elif type_weight == 'Uniform': weights = tf.ones_like(ref_vol) else: raise ValueError("The variable type_weight \"{}\"" "is not defined.".format(type_weight)) new_weights = tf.where(tf.is_inf(weights), tf.zeros_like(weights), weights) weights = tf.where(tf.is_inf(weights), tf.ones_like(weights) * tf.reduce_max(new_weights), weights) generalised_dice_numerator = \ 2 * tf.reduce_sum(tf.multiply(weights, intersect)) generalised_dice_denominator = \ tf.reduce_sum(tf.multiply(weights, seg_vol + ref_vol)) generalised_dice_score = \ generalised_dice_numerator / generalised_dice_denominator return 1 - generalised_dice_score
def integral(lower, upper): return tf.cond( tf.logical_or( tf.is_inf(tf.cast(lower, config.dtype)), tf.is_inf(tf.cast(upper, config.dtype)) ), lambda: tf.constant(1, dtype=config.dtype), lambda: tf.cast(upper, config.dtype) - tf.cast(lower, config.dtype), )
def integral(lower, upper): upper_integrand = tf.cond( tf.is_inf(tf.cast(upper, config.dtype)), lambda: tf.constant(1, dtype=config.dtype), lambda: _normal_cdf(upper, mu, sigma), ) lower_integrand = tf.cond( tf.is_inf(tf.cast(lower, config.dtype)), lambda: tf.constant(0, dtype=config.dtype), lambda: _normal_cdf(lower, mu, sigma), ) return upper_integrand - lower_integrand
def integral(lower, upper): upper_integrand = tf.cond( tf.is_inf(tf.cast(upper, config.dtype)), lambda: tf.constant(1, dtype=config.dtype), lambda: tf.exp(-lambda_*upper) ) lower_integrand = tf.cond( tf.is_inf(tf.cast(lower, config.dtype)), lambda: tf.constant(0, dtype=config.dtype), lambda: tf.exp(-lambda_*lower) ) return lower_integrand - upper_integrand
def apply_gradients(self, grads_and_vars, global_step=None, name=None): def apply_ops_wrapper(): update_op = self._optimizer.apply_gradients(grads_and_vars, global_step, name) apply_ops = [] with tf.control_dependencies([update_op]): for grad, var in grads_and_vars: if var.name in self._fp32_to_fp16: dst_var = self._fp32_to_fp16[var.name] apply_ops.append( tf.assign(dst_var, tf.saturate_cast(var, tf.float16))) if apply_ops: return tf.group(apply_ops) return update_op if self._loss_scaler: grad_has_nans, grad_amax = AutomaticLossScaler.check_grads(grads_and_vars) should_skip_update = tf.logical_or(tf.is_inf(grad_amax), grad_has_nans) loss_scale_update_op = self._loss_scaler.update_op(grad_has_nans, grad_amax) with tf.control_dependencies([loss_scale_update_op]): return tf.cond(should_skip_update, tf.no_op, apply_ops_wrapper) else: return apply_ops_wrapper()
def update_op(self, has_nan, amax): def overflow_case(): new_scale_val = tf.clip_by_value(self.scale / self.step_factor, self.scale_min, self.scale_max) scale_assign = tf.assign(self.scale, new_scale_val) overflow_iter_assign = tf.assign(self.last_overflow_iteration, self.iteration) with tf.control_dependencies([scale_assign, overflow_iter_assign]): return tf.identity(self.scale) def scale_case(): since_overflow = self.iteration - self.last_overflow_iteration should_update = tf.equal(since_overflow % self.step_window, 0) def scale_update_fn(): new_scale_val = tf.clip_by_value(self.scale * self.step_factor, self.scale_min, self.scale_max) return tf.assign(self.scale, new_scale_val) return tf.cond(should_update, scale_update_fn, lambda: self.scale) iter_update = tf.assign_add(self.iteration, 1) overflow = tf.logical_or(has_nan, tf.is_inf(amax)) update_op = tf.cond(overflow, overflow_case, scale_case) with tf.control_dependencies([update_op]): return tf.identity(iter_update)
def update_op(self, has_nan, amax): is_nonfinite = tf.logical_or(has_nan, tf.is_inf(amax)) x = tf.cond(is_nonfinite, lambda: tf.pow(2., self.log_max), lambda: tf.log(amax) / tf.log(tf.constant(2.))) x_hat_assn = tf.assign(self.x_hat, self.beta1 * self.x_hat + (1 - self.beta1) * x) b1_corr_assn = tf.assign(self.b1_correction, self.b1_correction * self.beta1) with tf.control_dependencies([x_hat_assn, b1_corr_assn]): mu = self.x_hat.read_value() / (1 - self.b1_correction.read_value()) slow_x_hat_assn = tf.assign(self.slow_x_hat, self.beta2 * self.slow_x_hat + (1 - self.beta2) * x) xsquared_hat_assn = tf.assign(self.xsquared_hat, self.beta2 * self.xsquared_hat + (1 - self.beta2) * (x * x)) b2_corr_assn = tf.assign(self.b2_correction, self.b2_correction * self.beta2) with tf.control_dependencies([slow_x_hat_assn, xsquared_hat_assn, b2_corr_assn]): e_xsquared = self.xsquared_hat.read_value() / (1 - self.b2_correction.read_value()) slow_mu = self.slow_x_hat.read_value() / (1 - self.b2_correction.read_value()) sigma2 = e_xsquared - (slow_mu * slow_mu) sigma = tf.sqrt(tf.maximum(sigma2, tf.constant(0.))) log_cutoff = sigma * self.overflow_std_dev + mu log_difference = 16 - log_cutoff proposed_scale = tf.pow(2., log_difference) scale_update = tf.assign(self.scale, tf.clip_by_value(proposed_scale, self.scale_min, self.scale_max)) iter_update = tf.assign_add(self.iteration, 1) with tf.control_dependencies([scale_update]): return tf.identity(iter_update)
def scale(self, x): """Scale x from -0.5 - 0.5 to 0 - 255.""" x = tf.where(tf.is_nan(x), tf.ones_like(x), x) x = tf.where(tf.is_inf(x), tf.ones_like(x), x) x = tf.clip_by_value(x, -0.5, 0.5) x += 0.5 x = x * 2**self.hparams.n_bits_x return tf.cast(tf.clip_by_value(x, 0, 255), dtype=tf.uint8)
def _log_prob(self, x): log_probs = self._log_unnormalized_prob(x) - self._log_normalization() if not self.interpolate_nondiscrete: # Ensure the gradient wrt `rate` is zero at non-integer points. neg_inf = tf.fill(tf.shape(log_probs), value=np.array( -np.inf, dtype=log_probs.dtype.as_numpy_dtype)) log_probs = tf.where(tf.is_inf(log_probs), neg_inf, log_probs) return log_probs
def set_zero_on_high_global_norm(self, grad, grad_norm_threshold, global_norm_tag=None): """ :param tf.Tensor grad: :param float grad_norm_threshold: :param str|None global_norm_tag: :rtype: tf.Tensor """ norm = self.get_global_grad_norm(tag=global_norm_tag) # Also check nan/inf. Treat them as if we would have been over grad_norm_threshold. zero_cond = tf.logical_or(tf.is_nan(norm), tf.is_inf(norm)) zero_cond = tf.logical_or(zero_cond, tf.greater(norm, grad_norm_threshold)) return tf.where(zero_cond, tf.zeros_like(grad), grad)
def _compare(self, x, use_gpu): np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x) with self.test_session(use_gpu=use_gpu) as sess: inx = tf.convert_to_tensor(x) ofinite, oinf, onan = tf.is_finite(inx), tf.is_inf(inx), tf.is_nan(inx) tf_finite, tf_inf, tf_nan = sess.run([ofinite, oinf, onan]) self.assertAllEqual(np_inf, tf_inf) self.assertAllEqual(np_nan, tf_nan) self.assertAllEqual(np_finite, tf_finite) self.assertShapeEqual(np_inf, oinf) self.assertShapeEqual(np_nan, onan) self.assertShapeEqual(np_finite, ofinite)
def _get_cubic_root(self): """Get the cubic root.""" # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. # It gives y^3 + py = q # where p = (D^2 h_min^2)/(2*C) and q = -p. # We use the Vieta's substitution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html assert_array = [ tf.Assert( tf.logical_not(tf.is_nan(self._dist_to_opt_avg)), [self._dist_to_opt_avg,]), tf.Assert( tf.logical_not(tf.is_nan(self._h_min)), [self._h_min,]), tf.Assert( tf.logical_not(tf.is_nan(self._grad_var)), [self._grad_var,]), tf.Assert( tf.logical_not(tf.is_inf(self._dist_to_opt_avg)), [self._dist_to_opt_avg,]), tf.Assert( tf.logical_not(tf.is_inf(self._h_min)), [self._h_min,]), tf.Assert( tf.logical_not(tf.is_inf(self._grad_var)), [self._grad_var,]) ] with tf.control_dependencies(assert_array): p = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var w3 = (-tf.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 w = tf.sign(w3) * tf.pow(tf.abs(w3), 1.0/3.0) y = w - p / 3.0 / w x = y + 1 return x
def loop_fn(loop_state, loop_args, unused_model_state, log_weights, resampled, mask, t): if loop_state is None: return (tf.zeros([batch_size], dtype=tf.float32), tf.zeros([batch_size], dtype=tf.float32), tf.zeros([num_samples, batch_size], dtype=tf.float32)) log_p_hat_acc, bellman_loss_acc, log_r_diff_acc = loop_state log_r, prev_log_r_tilde, log_p_x_given_z, log_r_diff = loop_args # Compute the log_p_hat update log_p_hat_update = tf.reduce_logsumexp( log_weights, axis=0) - tf.log(tf.to_float(num_samples)) # If it is the last timestep, we always add the update. log_p_hat_acc += tf.cond(t >= max_seq_len-1, lambda: log_p_hat_update, lambda: log_p_hat_update * resampled) # Compute the Bellman update. log_r = tf.reshape(log_r, [num_samples, batch_size]) prev_log_r_tilde = tf.reshape(prev_log_r_tilde, [num_samples, batch_size]) log_p_x_given_z = tf.reshape(log_p_x_given_z, [num_samples, batch_size]) mask = tf.reshape(mask, [num_samples, batch_size]) # On the first timestep there is no bellman error because there is no # prev_log_r_tilde. mask = tf.cond(tf.equal(t, 0), lambda: tf.zeros_like(mask), lambda: mask) # On the first timestep also fix up prev_log_r_tilde, which will be -inf. prev_log_r_tilde = tf.where( tf.is_inf(prev_log_r_tilde), tf.zeros_like(prev_log_r_tilde), prev_log_r_tilde) # log_lambda is [num_samples, batch_size] log_lambda = tf.reduce_mean(prev_log_r_tilde - log_p_x_given_z - log_r, axis=0, keepdims=True) bellman_error = mask * tf.square( prev_log_r_tilde - tf.stop_gradient(log_lambda + log_p_x_given_z + log_r) ) bellman_loss_acc += tf.reduce_mean(bellman_error, axis=0) # Compute the log_r_diff update log_r_diff_acc += mask * tf.reshape(log_r_diff, [num_samples, batch_size]) return (log_p_hat_acc, bellman_loss_acc, log_r_diff_acc)
def generalised_dice_loss(prediction, ground_truth, weight_map=None, type_weight='Square'): """ Function to calculate the Generalised Dice Loss defined in Sudre, C. et. al. (2017) Generalised Dice overlap as a deep learning loss function for highly unbalanced segmentations. DLMIA 2017 :param prediction: the logits (before softmax) :param ground_truth: the segmentation ground truth :param weight_map: :param type_weight: type of weighting allowed between labels (choice between Square (square of inverse of volume), Simple (inverse of volume) and Uniform (no weighting)) :return: the loss """ ground_truth = tf.to_int64(ground_truth) n_voxels = ground_truth.get_shape()[0].value n_classes = prediction.get_shape()[1].value prediction = tf.nn.softmax(prediction) ids = tf.constant(np.arange(n_voxels), dtype=tf.int64) ids = tf.stack([ids, ground_truth], axis=1) one_hot = tf.SparseTensor(indices=ids, values=tf.ones([n_voxels], dtype=tf.float32), dense_shape=[n_voxels, n_classes]) if weight_map is not None: weight_map_nclasses = tf.reshape(tf.tile(weight_map, [n_classes]), prediction.get_shape()) ref_vol = tf.sparse_reduce_sum(weight_map_nclasses * one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum(weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum(tf.multiply(weight_map_nclasses, prediction), 0) else: ref_vol = tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum(one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum(prediction, 0) if type_weight == 'Square': weights = tf.reciprocal(tf.square(ref_vol)) elif type_weight == 'Simple': weights = tf.reciprocal(ref_vol) elif type_weight == 'Uniform': weights = tf.ones_like(ref_vol) else: raise ValueError("The variable type_weight \"{}\"" \ "is not defined.".format(type_weight)) new_weights = tf.where(tf.is_inf(weights), tf.zeros_like(weights), weights) weights = tf.where(tf.is_inf(weights), tf.ones_like(weights) * tf.reduce_max(new_weights), weights) generalised_dice_numerator = \ 2 * tf.reduce_sum(tf.multiply(weights, intersect)) generalised_dice_denominator = \ tf.reduce_sum(tf.multiply(weights, seg_vol + ref_vol)) generalised_dice_score = \ generalised_dice_numerator / generalised_dice_denominator return 1 - generalised_dice_score
def build_net(self): if not os.path.exists(self.log_dir): wmlu.create_empty_dir(self.log_dir) if not os.path.exists(self.ckpt_dir): wmlu.create_empty_dir(self.ckpt_dir) with tf.device(":/cpu:0"): data = self.data.get_next() DataLoader.detection_image_summary(data, name="data_source") self.input_data = data '''if self.cfg.GLOBAL.DEBUG: data[IMAGE] = tf.Print(data[IMAGE],[tf.shape(data[IMAGE]),data[ORG_HEIGHT],data[ORG_WIDTH],data[HEIGHT],data[WIDTH]],summarize=100, name="XXXXX")''' self.res_data, loss_dict = self.model.forward(data) if self.model.is_training: for k, v in loss_dict.items(): tf.summary.scalar(f"loss/{k}", v) v = tf.cond(tf.logical_or(tf.is_nan(v), tf.is_inf(v)), lambda: tf.zeros_like(v), lambda: v) tf.losses.add_loss(v) elif self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.RESEARCH: research = self.cfg.GLOBAL.RESEARCH if 'result_classes' in research: print("replace labels with gtlabels.") labels = odt.replace_with_gtlabels( bboxes=self.res_data[RD_BOXES], labels=self.res_data[RD_LABELS], length=self.res_data[RD_LENGTH], gtbboxes=data[GT_BOXES], gtlabels=data[GT_LABELS], gtlength=data[GT_LENGTH]) self.res_data[RD_LABELS] = labels if 'result_bboxes' in research: print("replace bboxes with gtbboxes.") bboxes = odt.replace_with_gtbboxes( bboxes=self.res_data[RD_BOXES], labels=self.res_data[RD_LABELS], length=self.res_data[RD_LENGTH], gtbboxes=data[GT_BOXES], gtlabels=data[GT_LABELS], gtlength=data[GT_LENGTH]) self.res_data[RD_BOXES] = bboxes self.loss_dict = loss_dict if not self.model.is_training and self.cfg.GLOBAL.GPU_MEM_FRACTION > 0.1: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self. cfg.GLOBAL.GPU_MEM_FRACTION) config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) else: config = tf.ConfigProto(allow_soft_placement=True) if not self.model.is_training and self.cfg.GLOBAL.GPU_MEM_FRACTION <= 0.1: config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.top_variable_name_scope = "Model" if self.model.is_training: steps = self.cfg.SOLVER.STEPS print("Train steps:", steps) lr = wnn.build_learning_rate( self.cfg.SOLVER.BASE_LR, global_step=self.global_step, lr_decay_type=self.cfg.SOLVER.LR_DECAY_TYPE, steps=steps, decay_factor=self.cfg.SOLVER.LR_DECAY_FACTOR, total_steps=steps[-1], warmup_steps=self.cfg.SOLVER.WARMUP_ITERS) tf.summary.scalar("lr", lr) opt = wnn.str2optimizer("Momentum", lr, momentum=0.9) self.max_train_step = steps[-1] self.train_op, self.total_loss, self.variables_to_train = wnn.nget_train_op( self.global_step, optimizer=opt, clip_norm=self.cfg.SOLVER.CLIP_NORM) print("variables to train:") wmlu.show_list(self.variables_to_train) for v in self.variables_to_train: wsummary.histogram_or_scalar(v, v.name[:-2]) wnn.log_moving_variable() self.saver = tf.train.Saver(max_to_keep=100) tf.summary.scalar(self.cfg.GLOBAL.PROJ_NAME + "_total_loss", self.total_loss) self.summary = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter(self.log_dir, self.sess.graph) init = tf.global_variables_initializer() self.sess.run(init) print("batch_norm_ops.") wmlu.show_list( [x.name for x in tf.get_collection(tf.GraphKeys.UPDATE_OPS)])
def build_graph(self): with tf.name_scope('ganist'): ### define placeholders for image and label inputs **g_num** **mt** self.im_input = tf.placeholder(tf_dtype, [None]+self.data_dim, name='im_input') #self.z_input = tf.placeholder(tf_dtype, [None, self.z_dim], name='z_input') #self.z_input = tf.placeholder(tf_dtype, [None, 1, 1, 1], name='z_input') self.z_input = tf.placeholder(tf.int32, [None], name='z_input') self.zi_input = tf.placeholder(tf_dtype, [None, self.z_dim], name='zi_input') self.e_input = tf.placeholder(tf_dtype, [None, 1, 1, 1], name='e_input') self.train_phase = tf.placeholder(tf.bool, name='phase') ### build generator **mt** self.g_layer = self.build_gen(self.z_input, self.zi_input, self.g_act, self.train_phase) #self.g_layer = self.build_gen_mt(self.im_input, self.z_input, self.g_act, self.train_phase) ### build discriminator self.r_logits, self.r_hidden = self.build_dis(self.im_input, self.d_act, self.train_phase) self.g_logits, self.g_hidden = self.build_dis(self.g_layer, self.d_act, self.train_phase, reuse=True) self.r_en_logits = self.build_encoder(self.r_hidden, self.d_act, self.train_phase) self.g_en_logits = self.build_encoder(self.g_hidden, self.d_act, self.train_phase, reuse=True) ### real gen manifold interpolation rg_layer = (1.0 - self.e_input) * self.g_layer + self.e_input * self.im_input self.rg_logits, _ = self.build_dis(rg_layer, self.d_act, self.train_phase, reuse=True) ### build d losses if self.d_loss_type == 'log': self.d_r_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=self.r_logits, labels=tf.ones_like(self.r_logits, tf_dtype)) self.d_g_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=self.g_logits, labels=tf.zeros_like(self.g_logits, tf_dtype)) self.d_rg_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=self.rg_logits, labels=tf.ones_like(self.rg_logits, tf_dtype)) elif self.d_loss_type == 'was': self.d_r_loss = -self.r_logits self.d_g_loss = self.g_logits self.d_rg_loss = -self.rg_logits else: raise ValueError('>>> d_loss_type: %s is not defined!' % self.d_loss_type) ### gradient penalty ### NaN free norm gradient rg_grad = tf.gradients(self.rg_logits, rg_layer) rg_grad_flat = tf.reshape(rg_grad, [-1, np.prod(self.data_dim)]) rg_grad_ok = tf.reduce_sum(tf.square(rg_grad_flat), axis=1) > 1. rg_grad_safe = tf.where(rg_grad_ok, rg_grad_flat, tf.ones_like(rg_grad_flat)) #rg_grad_abs = tf.where(rg_grad_flat >= 0., rg_grad_flat, -rg_grad_flat) rg_grad_abs = 0. * rg_grad_flat rg_grad_norm = tf.where(rg_grad_ok, tf.norm(rg_grad_safe, axis=1), tf.reduce_sum(rg_grad_abs, axis=1)) gp_loss = tf.square(rg_grad_norm - 1.0) ### for logging self.rg_grad_norm_output = tf.norm(rg_grad_flat, axis=1) ### d loss combination **g_num** self.d_loss_mean = tf.reduce_mean(self.d_r_loss + self.d_g_loss) self.d_loss_total = self.d_loss_mean + self.gp_loss_weight * tf.reduce_mean(gp_loss) ### build g loss if self.g_loss_type == 'log': self.g_loss = -tf.nn.sigmoid_cross_entropy_with_logits( logits=self.g_logits, labels=tf.zeros_like(self.g_logits, tf_dtype)) elif self.g_loss_type == 'mod': self.g_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=self.g_logits, labels=tf.ones_like(self.g_logits, tf_dtype)) elif self.g_loss_type == 'was': self.g_loss = -self.g_logits else: raise ValueError('>>> g_loss_type: %s is not defined!' % self.g_loss_type) self.g_loss_mean = tf.reduce_mean(self.g_loss, axis=None) self.g_grad_norm = tf.norm(tf.reshape( tf.gradients(self.g_loss, self.g_layer), [-1, np.prod(self.data_dim)]), axis=1) ### mean matching mm_loss = tf.reduce_mean(tf.square(tf.reduce_mean(self.g_layer, axis=0) - tf.reduce_mean(self.im_input, axis=0)), axis=None) ### reconstruction penalty rec_penalty = tf.reduce_mean(tf.minimum(tf.log(tf.reduce_sum( tf.square(self.g_layer - self.im_input), axis=[1, 2, 3])+1e-6), 6.)) \ + tf.reduce_mean(tf.minimum(tf.log(tf.reduce_sum( tf.square(self.g_layer - tf.reverse(self.im_input, axis=[0])), axis=[1, 2, 3])+1e-6), 6.)) ### generated encoder loss: lower bound on mutual_info(z_input, generator id) **g_num** self.g_en_loss = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(tf.reshape(self.z_input, [-1]), self.g_num, dtype=tf_dtype), logits=self.g_en_logits) ### real encoder entropy: entropy of g_id given real image, marginal entropy of g_id **g_num** self.r_en_h = -tf.reduce_mean(tf.reduce_sum(tf.nn.softmax(self.r_en_logits) * tf.nn.log_softmax(self.r_en_logits), axis=1)) r_en_marg_pr = tf.reduce_mean(tf.nn.softmax(self.r_en_logits), axis=0) self.r_en_marg_hlb = -tf.reduce_sum(r_en_marg_pr * tf.log(r_en_marg_pr + 1e-8)) print 'e_en_logits_shape: ', self.r_en_logits.shape ### discounter self.rl_counter = tf.get_variable('rl_counter', dtype=tf_dtype, initializer=1.0) ### g loss combination **g_num** #self.g_loss_mean += self.mm_loss_weight * mm_loss - self.rec_penalty_weight * rec_penalty self.g_loss_total = self.g_loss_mean + self.en_loss_weight * tf.reduce_mean(self.g_en_loss) ### e loss combination self.en_loss_total = tf.reduce_mean(self.g_en_loss) + \ 0. * self.r_en_h + 0.* -self.r_en_marg_hlb ### collect params self.g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "g_net") self.d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "d_net") self.e_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "e_net") ### compute stat of weights self.nan_vars = 0. self.inf_vars = 0. self.zero_vars = 0. self.big_vars = 0. self.count_vars = 0 for v in self.g_vars + self.d_vars: self.nan_vars += tf.reduce_sum(tf.cast(tf.is_nan(v), tf_dtype)) self.inf_vars += tf.reduce_sum(tf.cast(tf.is_inf(v), tf_dtype)) self.zero_vars += tf.reduce_sum(tf.cast(tf.square(v) < 1e-6, tf_dtype)) self.big_vars += tf.reduce_sum(tf.cast(tf.square(v) > 1., tf_dtype)) self.count_vars += tf.reduce_prod(v.get_shape()) self.count_vars = tf.cast(self.count_vars, tf_dtype) #self.nan_vars /= self.count_vars #self.inf_vars /= self.count_vars self.zero_vars /= self.count_vars self.big_vars /= self.count_vars self.g_vars_count = 0 self.d_vars_count = 0 self.e_vars_count = 0 for v in self.g_vars: self.g_vars_count += int(np.prod(v.get_shape())) for v in self.d_vars: self.d_vars_count += int(np.prod(v.get_shape())) for v in self.e_vars: self.e_vars_count += int(np.prod(v.get_shape())) ### build optimizers update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print '>>> update_ops list: ', update_ops with tf.control_dependencies(update_ops): self.g_opt = tf.train.AdamOptimizer( self.g_lr, beta1=self.g_beta1, beta2=self.g_beta2).minimize( self.g_loss_total, var_list=self.g_vars) self.d_opt = tf.train.AdamOptimizer( self.d_lr, beta1=self.d_beta1, beta2=self.d_beta2).minimize( self.d_loss_total, var_list=self.d_vars) self.e_opt = tf.train.AdamOptimizer( self.e_lr, beta1=self.e_beta1, beta2=self.e_beta2).minimize( self.en_loss_total, var_list=self.e_vars) ### summaries **g_num** g_loss_sum = tf.summary.scalar("g_loss", self.g_loss_mean) d_loss_sum = tf.summary.scalar("d_loss", self.d_loss_mean) e_loss_sum = tf.summary.scalar("e_loss", self.en_loss_total) self.summary = tf.summary.merge([g_loss_sum, d_loss_sum, e_loss_sum]) ### Policy gradient updates **g_num** self.pg_var = tf.get_variable('pg_var', dtype=tf_dtype, initializer=self.g_rl_vals) self.pg_q = tf.get_variable('pg_q', dtype=tf_dtype, initializer=self.g_rl_vals) self.pg_base = tf.get_variable('pg_base', dtype=tf_dtype, initializer=0.0) self.pg_var_flat = self.pg_temp * tf.reshape(self.pg_var, [1, -1]) ### log p(x) for the selected policy at each batch location log_soft_policy = -tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(tf.reshape(self.z_input, [-1]), self.g_num, dtype=tf_dtype), logits=tf.tile(self.pg_var_flat, tf.shape(tf.reshape(self.z_input, [-1, 1])))) self.gi_h = -tf.reduce_sum(tf.nn.softmax(self.pg_var) * tf.nn.log_softmax(self.pg_var)) ### policy gradient reward #pg_reward = tf.reshape(self.d_g_loss, [-1]) - 0.*self.en_loss_weight * tf.reshape(self.g_en_loss, [-1]) pg_reward = tf.reduce_mean(self.r_en_logits, axis=0) ### critic update (q values update) #pg_q_z = tf.gather(self.pg_q, tf.reshape(self.z_input, [-1])) #pg_q_opt = tf.scatter_update(self.pg_q, tf.reshape(self.z_input, [-1]), # self.pg_q_lr*pg_q_z + (1-self.pg_q_lr) * pg_reward) rl_counter_opt = tf.assign(self.rl_counter, self.rl_counter * 0.999) ### r_en_logits as q values pg_q_opt = tf.assign(self.pg_q, (1-self.pg_q_lr)*self.pg_q + \ self.pg_q_lr * pg_reward) ### cross entropy E_x H(p(c|x)||q(c)) with tf.control_dependencies([pg_q_opt, rl_counter_opt]): en_pr = tf.nn.softmax(self.r_en_logits) pg_loss_total = -tf.reduce_mean(en_pr * tf.nn.log_softmax(self.pg_var)) \ - 1000. * self.rl_counter * self.gi_h ### actor update (p values update) #with tf.control_dependencies([pg_q_opt, rl_counter_opt]): # pg_q_zu = tf.gather(self.pg_q, tf.reshape(self.z_input, [-1])) # pg_loss_total = -tf.reduce_mean(log_soft_policy * pg_q_zu) + \ # 1000. * self.rl_counter * -self.gi_h self.pg_opt = tf.train.AdamOptimizer( self.pg_lr, beta1=self.pg_beta1, beta2=self.pg_beta2).minimize( pg_loss_total, var_list=[self.pg_var])
def f(x, init, ema, dropout_p, verbose, context): # if verbose and context is not None: # print('got context') if init and verbose: # debug stuff with tf.variable_scope('debug'): xmean, xvar = tf.nn.moments(x, axes=list( range(len(x.get_shape())))) x = tf.Print( x, [ tf.shape(x), xmean, tf.sqrt(xvar), tf.reduce_min(x), tf.reduce_max(x), tf.reduce_any(tf.is_nan(x)), tf.reduce_any(tf.is_inf(x)) ], message='{} (shape/mean/std/min/max/nan/inf) '.format( self.template.variable_scope.name), summarize=10, ) B, H, W, C = x.shape.as_list() pos_emb = to_default_floatx( get_var( 'pos_emb', ema=ema, shape=[H, W, filters], initializer=tf.random_normal_initializer(stddev=0.01), )) x = conv2d(x, name='c1', num_units=filters, init=init, ema=ema) for i_block in range(blocks): with tf.variable_scope('block{}'.format(i_block)): x = gated_resnet(x, name='conv', a=context, use_nin=use_nin, init=init, ema=ema, dropout_p=dropout_p) if use_ln: x = norm(x, name='ln1', ema=ema) x = nonlinearity(x) x = (nin if use_final_nin else conv2d)(x, name='c2', num_units=C * (2 + 3 * components), init_scale=init_scale, init=init, ema=ema) assert x.shape == [B, H, W, C * (2 + 3 * components)] x = tf.reshape(x, [B, H, W, C, 2 + 3 * components]) x = at_least_float32(x) # do mix-logistics in tf.float32 s, t = tf.tanh(x[:, :, :, :, 0]), x[:, :, :, :, 1] ml_logits, ml_means, ml_logscales = tf.split(x[:, :, :, :, 2:], 3, axis=4) ml_logscales = tf.maximum(ml_logscales, -7.) assert s.shape == t.shape == [B, H, W, C] assert ml_logits.shape == ml_means.shape == ml_logscales.shape == [ B, H, W, C, components ] return s, t, ml_logits, ml_means, ml_logscales
def _object_masks_to_attention(self, object_masks, object_translation): use_mask = tf.cast( tf.reduce_sum(object_masks, axis=[1, 2]) > 2.0, tf.float32) teach_mask = tf.cast(tf.reduce_sum(use_mask, axis=-1) > 0, tf.float32) object_masks *= tf.expand_dims(tf.expand_dims(use_mask, axis=1), axis=1) reduce_op = tf.reduce_max if self.hparams.get( 'max_attention_truth') else tf.reduce_sum img_w = self.hparams['img_w'] img_h = self.hparams['img_h'] batch_size = self.hparams['batch_size'] k_obj = self.hparams['k_obj'] assert object_masks.get_shape()[0] == batch_size assert object_masks.get_shape()[3] == k_obj assert len(object_masks.get_shape()) == 4 if self.hparams.get('small_object_prior'): border = 2 cutoff = 0.1 num_masks = batch_size * k_obj object_masks_flat = tf.transpose(object_masks, [0, 3, 1, 2]) object_masks_flat = tf.reshape(object_masks_flat, [num_masks, img_h, img_w]) border_mask = np.ones([84, 84]) for i in range(border): border_mask[i, :] = 0 border_mask[:, i] = 0 mask = object_masks_flat * border_mask mask = (mask - cutoff) / (1. - cutoff) mask *= tf.cast(mask > 0, tf.float32) mask = tf.reshape(mask, [num_masks, -1]) mask_sum = tf.reduce_sum(mask, axis=-1) # Assume mask_sum > epsilon. mask /= tf.expand_dims(mask_sum, axis=-1) x_linspace = tf.linspace(0., img_w - 1., img_w) y_linspace = tf.linspace(0., img_h - 1., img_h) x_coord, y_coord = tf.meshgrid(x_linspace, y_linspace) x_coord = tf.reshape(x_coord, [1, -1]) y_coord = tf.reshape(y_coord, [1, -1]) mesh_grid = tf.concat([y_coord, x_coord], axis=0) mesh_grid = tf.expand_dims(tf.transpose(mesh_grid), axis=0) mean_coord = tf.reduce_sum(mesh_grid * tf.expand_dims(mask, -1), axis=1) mean_coord = tf.expand_dims(mean_coord, 1) assert (mean_coord.get_shape() == [num_masks, 1, 2]) diffs = mask * tf.reduce_sum(tf.square(mesh_grid - mean_coord), axis=-1) diffs = tf.reduce_mean(diffs, axis=-1) weight = (1.0 / diffs) weight = tf.where(condition=tf.is_nan(weight), x=tf.zeros_like(weight), y=weight) weight = tf.where(condition=tf.is_inf(weight), x=tf.zeros_like(weight), y=weight) weight = tf.reshape(weight, [batch_size, 1, 1, k_obj]) elif self.hparams.get('object_flow_weighting'): flow_norms = tf.reduce_sum(tf.abs(object_translation), axis=-1) + 1e-6 flow_totals = tf.reduce_sum(flow_norms, axis=-1, keep_dims=True) weight = tf.expand_dims(tf.expand_dims(flow_norms / flow_totals, axis=1), axis=1) else: weight = np.ones([batch_size, 1, 1, k_obj]) summed_masks = reduce_op(object_masks * weight, axis=-1, keep_dims=True) return summed_masks, teach_mask
lambda: _lambda_z[0][n]) k = k + 1 _lambda_z[0][n] = tf.cond( tf.logical_or( tf.less_equal(tf.reduce_sum(_lambda_z[0][n]), 0.9999999), tf.greater_equal(tf.reduce_sum(_lambda_z[0][n]), 1.0000001)), lambda: PGMethod(_lambda_z[0][n], [1, K]), lambda: _lambda_z[0][n]) n = n + 1 if (n == N): lambda_z = tf.concat(_lambda_z[0], 0) del _lambda_z[:] gc.collect() # Deal with nan and inf for i in range(K): inf_pi_msg = tf.cond(tf.equal(tf.is_inf(lambda_pi)[i], True), lambda: True, lambda: False) if inf_pi_msg == True: print("lambda_pi is inf") lambda_pi = tf.cond(tf.equal(tf.is_inf(lambda_pi)[i], True), lambda: prev_lambda_pi, lambda: lambda_pi) nan_pi_msg = tf.cond(tf.equal(tf.is_nan(lambda_pi)[i], True), lambda: True, lambda: False) if nan_pi_msg == True: print("lambda_pi is nan") lambda_pi = tf.cond(tf.equal(tf.is_nan(lambda_pi)[i], True), lambda: prev_lambda_pi, lambda: lambda_pi) inf_mu_msg = tf.cond(tf.equal(tf.is_inf(lambda_mu)[0][i], True), lambda: True, lambda: False) if inf_mu_msg == True: print("lambda_mu is inf")
def apply_gradients(self, grads_and_vars, global_step=None, name=None): d_vars = [] g_vars = [] d_grads = [] g_grads = [] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] d_grads += [grad] elif var in self.gan.g_vars(): g_vars += [var] g_grads += [grad] else: raise("Couldn't find var in g_vars or d_vars") all_grads = d_grads + g_grads var_list = d_vars + g_vars with ops.init_scope(): f1 = [self._zeros_slot(v, "f", self._name) for v in var_list] v1 = [self._get_or_make_slot(v, v, "v1", self._name) for v in var_list] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): self._zeros_slot(var, "pm", "pm") self._prepare() f1 = [self.get_slot(v, "f") for v in var_list] v1 = [self.get_slot(v, "v1") for v in var_list] slots_list = [] slots_vars = [] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): slots_vars += [var] slots_list.append(self._zeros_slot(var, "pm", "pm")) current_vars = var_list + slots_vars tmp_vars = v1 + slots_list diff = [tf.square(v-t) for v,t in zip(current_vars, tmp_vars)] f_accum = [(self.config.f_decay or 0.95) * f + tf.square(g) for f, g in zip(f1, all_grads)] self.gan.add_metric('f1',tf.reduce_sum([tf.reduce_sum(f) for f in f1])) f_accum = [tf.where(tf.is_nan(_f), tf.zeros_like(_f), _f) for _f in f_accum] f_accum = [tf.where(tf.is_inf(_f), tf.zeros_like(_f), _f) for _f in f_accum] reg = [tf.multiply(f, d) for f,d in zip(f1, diff)] reg = [tf.where(tf.is_nan(_f), tf.zeros_like(_f), _f) for _f in reg] ewc_loss = (self.config.lam or 17.5)/2.0 * tf.reduce_sum([tf.reduce_sum(r) for r in reg]) self.gan.add_metric('ewc',ewc_loss) save_weights = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, current_vars)]) # store variables if isinstance(self.loss, list): if self.config.add_ewc_loss_gradients: newloss = [ewc_loss, ewc_loss] else: newloss = [self.loss[0]+ewc_loss, self.loss[1]+ewc_loss] new_grads = tf.gradients(newloss[0], d_vars) + tf.gradients(newloss[1], g_vars) self.optimizer.loss = [ewc_loss+self.loss[0], ewc_loss+self.loss[1]] else: if self.config.add_ewc_loss_gradients: newloss = ewc_loss else: newloss = self.loss+ewc_loss new_grads = tf.gradients(newloss, current_vars) self.optimizer.loss =ewc_loss+self.loss if self.config.add_ewc_loss_gradients: new_grads = [_g+_ng for _g,_ng in zip(all_grads, new_grads)] for g, oldg, v in zip(new_grads, all_grads, current_vars): if(self.gan.ops.shape(g) != self.gan.ops.shape(oldg)): print("[ERROR] Shape change on gradients for", v, g, "old g", oldg) raise "Gradient change error" step = self.optimizer.apply_gradients(list(zip(new_grads, current_vars)).copy(), global_step=global_step, name=name) store_f = tf.group(*[tf.assign(w, v) for w,v in zip(f1, f_accum)]) with tf.get_default_graph().control_dependencies([store_f]): with tf.get_default_graph().control_dependencies([step]): with tf.get_default_graph().control_dependencies([save_weights]): return tf.no_op()
def inverse(depth): inverse=tf.divide(tf.ones_like(depth),depth) inverse=tf.where(tf.is_nan(inverse),tf.zeros_like(inverse),inverse) inverse=tf.where(tf.is_inf(inverse),tf.zeros_like(inverse),inverse) return inverse
k=0 while(k < K): #tf.less_equal( lambda_z[0][n][0][k], 0.0 ) _lambda_z[0][n] = tf.cond( tf.less_equal( _lambda_z[0][n][0][k], 0.0 ), lambda: PGMethod(_lambda_z[0][n], [1, K]), lambda: _lambda_z[0][n] ) k = k + 1 _lambda_z[0][n] = tf.cond( tf.logical_or( tf.less_equal( tf.reduce_sum( _lambda_z[0][n] ), 0.9999999 ), tf.greater_equal( tf.reduce_sum( _lambda_z[0][n] ), 1.0000001 ) ), lambda: PGMethod( _lambda_z[0][n], [1, K] ), lambda: _lambda_z[0][n] ) n = n + 1 if(n == N): lambda_z = tf.concat(_lambda_z[0], 0) del _lambda_z[:] gc.collect() # Deal with nan and inf for i in range(K): inf_pi_msg = tf.cond( tf.equal( tf.is_inf(lambda_pi)[i], True ), lambda: True, lambda: False ) if inf_pi_msg == True: print("lambda_pi is inf") lambda_pi = tf.cond( tf.equal( tf.is_inf(lambda_pi)[i], True ), lambda: prev_lambda_pi, lambda: lambda_pi ) nan_pi_msg = tf.cond( tf.equal( tf.is_nan(lambda_pi)[i], True ), lambda: True, lambda: False ) if nan_pi_msg == True: print("lambda_pi is nan") lambda_pi = tf.cond( tf.equal( tf.is_nan(lambda_pi)[i], True ), lambda: prev_lambda_pi, lambda: lambda_pi ) inf_mu_msg = tf.cond( tf.equal( tf.is_inf(lambda_mu)[0][i], True ), lambda: True, lambda: False ) if inf_mu_msg == True: print("lambda_mu is inf") lambda_mu = tf.cond( tf.equal( tf.is_inf(lambda_mu)[0][i], True ), lambda: prev_lambda_mu, lambda: lambda_mu ) nan_mu_msg = tf.cond( tf.equal( tf.is_nan(lambda_mu)[0][i], True ), lambda: True, lambda: False ) if nan_mu_msg == True: print("lambda_mu is nan") lambda_mu = tf.cond( tf.equal( tf.is_nan(lambda_mu)[0][i], True ), lambda: prev_lambda_mu, lambda: lambda_mu )
def deep_er_model_generator(data_dict, embedding_file, padding_limit = 100, mask_zero = True, embedding_trainable = False, text_columns = list(), numeric_columns = list(), make_isna = True, text_nan_idx = None, num_nan_val = None, text_compositions = ['average'], text_sim_metrics = ['cosine'], numeric_sim_metrics = ['unscaled_inverse_lp'], dense_nodes = [10], lstm_args = dict(units=50), document_frequencies = None, idf_smoothing = 2, batch_norm = False, dropout = 0, shared_lstm = True, debug = False): """ Takes a dictionary of paired split DataFrames and returns a DeepER model with data formatted for said model. Parameters ---------- data_dict : dict A dictionary of dataframes (pd.DataFrame) stored with the following keys: train_1, val_1, test_1, train_2, val_2, test_2 embedding_file : str The location and name of numpy matrix containing word vector embeddings. padding_limit : int, optional The maximum length of any text sequence. For any text attribute whose max length is below padding_limit, the max length will be used. Otherwise, padding_limit will be used to both pad and truncuate text sequences for that attribute. mask_zero : bool, optional Whether to ignore text sequence indices with value of 0. Useful for LSTM's and variable length inputs. embedding_trainable: bool, optional Whether to allow the embedding layer to be fine tuned. text_columns : list of strings, optional A list of names of text-based attributes numeric_columns : list of strings, optional A list of names of numeric attributes make_isna: bool, optional Whether to create new attributes indicating the presence of null values for each original attribute. text_nan_idx : int, optional The index corresponding to NaN values in text-based attributes. num_nan_val : int, optional The value corresponding to NaN values in numeric attributes. text_compositions : list of strings, optional List of composition methods to be applied to embedded text attributes. Valid options are : - average : a simple average of all embedded vectors - idf : an average of all embedded vectors weighted by normalized inverse document frequency text_sim_metrics : list of strings, optional List of similarity metrics to be computed for each text-based attribute. Valid options are : - cosine - inverse_l1 : e^-[l1_distance] - inverse_l2 : e^-[l2_distance] numeric_sim_metrics : list of strings, optional List of similarity metrics to be computed for each numeric attribute. Valid options are : - scaled_inverse_lp : e^[-2(abs_diff)/sum] - unscaled_inverse_lp : e^[-abs_diff] - min_max_ratio : min / max dense_nodes : list of ints, optional Specifies topology of hidden dense layers lstm_args = dict, optional Keyword arguments for LSTM layer document_frequencies = tuple of length 2, optional Tuple of two lists of document frequencies, left side then right idf_smoothing : int, optional un-normalized idf = 1 / df ^ (1 / idf_smoothing) Higher values means that high document frequency words are penalized less. """ ### DATA PROCESSING ### # initialize an empty dictionary for storing all data # dictionary structure will be data[split][side][column] sides = ['left', 'right'] splits = ['train', 'val', 'test'] data = dict() for split in splits: data[split] = dict() for side in sides: data[split][side] = dict() columns = text_columns + numeric_columns # separate each feature into its own dictionary entry for column in columns: data['train']['left'][column] = data_dict['train_1'][column] data['train']['right'][column] = data_dict['train_2'][column] data['val']['left'][column] = data_dict['val_1'][column] data['val']['right'][column] = data_dict['val_2'][column] data['test']['left'][column] = data_dict['test_1'][column] data['test']['right'][column] = data_dict['test_2'][column] # if enabled, create a binary column for each feature indicating whether # it contains a missing value. for text data, this will be a list with # a single index representing the 'NaN' token. for numeric data, this will # likely be a 0. if make_isna: for split, side, column in it.product(splits, sides, text_columns): isna = data[split][side][column].apply(lambda x: x == [text_nan_idx]) isna = isna.values.astype(np.float32).reshape(-1, 1) isna_column = column + '_isna' data[split][side][isna_column] = isna for split, side, column in it.product(splits, sides, numeric_columns): isna = data[split][side][column].apply(lambda x: x == num_nan_val) isna_column = column + '_isna' isna = isna.values.astype(np.float32).reshape(-1, 1) data[split][side][isna_column] = isna # pad each text column according to the length of its longest entry in # both datasets maxlen = dict() import numpy as np for column in text_columns: print(data['train']['left'][column][:20]) maxlen_left = data['train']['left'][column].apply(lambda x: len(x) if type(x) != float else len([x])).max() #print(maxlen_left) # data['train']['left'][column].apply(lambda x: print(x) if type(x) != float and len(x)==3151 ) maxlen_right = data['train']['right'][column].apply(lambda x: len(x) if type(x) != float else len([x])).max() print(maxlen_left, maxlen_right ) maxlength = min(padding_limit, max(maxlen_left, maxlen_right)) #data[split][side][column] = data[split][side][column].apply(lambda x: [] if x == np.nan else x) for split, side in it.product(splits, sides): data[split][side][column] = data[split][side][column].apply(lambda x: [] if x == np.nan else x) data[split][side][column] = pad_sequences(data[split][side][column],maxlen=maxlength,padding='post',truncating='post') maxlen[column] = maxlength # convert all numeric features to float and reshape to be 2-dimensional for split, side, column in it.product(splits, sides, numeric_columns): feature = data[split][side][column] feature = feature.values.astype(np.float32).reshape(-1,1) data[split][side][column] = feature # format X values for each split as a list of 2-dimensional arrays packaged_data = OrderedDict() for split in splits: packaged_data[split] = list() for side, column in it.product(sides, columns): packaged_data[split].append(data[split][side][column]) if make_isna: for side, column in it.product(sides, columns): packaged_data[split].append(data[split][side][column + '_isna']) # convert y-values y_train = to_categorical(data_dict['train_y']) y_val = to_categorical(data_dict['val_y']) y_test = to_categorical(data_dict['test_y']) data_train = data['train'] data_test = data['test'] data_val = data['val'] ### MODEL BUILDING ### # each attribute of each side is its own input tensor # text input tensors for both sides are created before numeric input tensors input_tensors = dict(left=dict(), right=dict()) for side, column in it.product(sides, text_columns): input_tensors[side][column] = Input(shape=(maxlen[column],)) for side, column in it.product(sides, numeric_columns): input_tensors[side][column] = Input(shape=(1,)) # create a single embedding layer for text input tensors embedding_matrix = np.load(embedding_file) embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=embedding_trainable, mask_zero=mask_zero) # use embedding_layer ot convert text input tensors to embedded tensors # and store in dictionary. # an embedding tensor will have shape n_words x n_embedding_dimensions embedded_tensors = dict(left=dict(), right=dict()) for side, column in it.product(sides, text_columns): embedded_tensors[side][column] = embedding_layer(input_tensors[side][column]) # initialize dictionary for storing a composition tensor for each embedding tensor composed_tensors = dict() for composition in text_compositions: composed_tensors[composition] = dict() for side in sides: composed_tensors[composition][side] = dict() # if enabled, reduce each embedding tensor to a quasi-1-dimensional tensor # with shape 1 x n_embedding_dimensions by averaging all embeddings if 'average' in text_compositions: averaging_layer = Lambda(lambda x: K.mean(x, axis=1), output_shape=(maxlen[column],)) for side, column in it.product(sides, text_columns): composed_tensors['average'][side][column] = averaging_layer(embedded_tensors[side][column]) # if enabled, reduce each embedding tensor to a quasi-1-dimensional tensor # with shape 1 x n_embedding_dimensions by taking a weighted average of all # embeddings. if 'idf' in text_compositions: # store document frequency constants for each side dfs_constant = dict() dfs_constant['left'] = K.constant(document_frequencies[0]) dfs_constant['right'] = K.constant(document_frequencies[1]) # a selection layer uses an input tensor as indices to select # document frequencies from dfs_constant dfs_selection_layer = dict() # a conversion layer converts a tensor of selected document frequencies # to a tensor of inverse document frequencies. the larger the DF, # the smaller the inverse, the smallness of which is controlled by # idf_smoothing idf_conversion_layer = Lambda(lambda x: 1 / (K.pow(x, 1/idf_smoothing))) # document frequencies of 0 will result in IDF's of inf. these should # be converted back to 0's. idf_fix_layer = Lambda(lambda x: tf.where(tf.is_inf(x), tf.zeros_like(x), x)) # for each IDF tensor, scale its values so they sum to 1 idf_normalization_layer = Lambda(lambda x: x / K.expand_dims(K.sum(x, axis=1), axis=1)) # take dot product between embedding tensor vectors and IDF weights dot_layer = Dot(axes=1) for side in sides: dfs_selection_layer[side] = Lambda(lambda x: K.gather(dfs_constant[side], K.cast(x, tf.int32))) for column in text_columns: dfs_tensor = dfs_selection_layer[side](input_tensors[side][column]) idfs_tensor = idf_conversion_layer(dfs_tensor) idfs_tensor_fixed = idf_fix_layer(idfs_tensor) idfs_tensor_normalized = idf_normalization_layer(idfs_tensor_fixed) composed_tensors['idf'][side][column] = dot_layer([embedded_tensors[side][column], idfs_tensor_normalized]) # if enabled, compose embedding tensor using shared LSTM if 'lstm' in text_compositions: if shared_lstm: lstm_layer = LSTM(**lstm_args) for side, column in it.product(sides, text_columns): if not shared_lstm: lstm_layer = LSTM(**lstm_args) composed_tensors['lstm'][side][column] = lstm_layer(embedded_tensors[side][column]) # if enambled, compose embedding tensor using bi-directional LSTM if 'bi_lstm' in text_compositions: if shared_lstm: lstm_layer = lstm_layer = Bidirectional(LSTM(**lstm_args), merge_mode='concat') for side, column in it.product(sides, text_columns): if not shared_lstm: lstm_layer = Bidirectional(LSTM(**lstm_args), merge_mode='concat') composed_tensors['bi_lstm'][side][column] = lstm_layer(embedded_tensors[side][column]) # maintain list of text-based similarities to calculate similarity_layers = list() if 'cosine' in text_sim_metrics: similarity_layer = Dot(axes=1, normalize=True) similarity_layers.append(similarity_layer) if 'inverse_l1' in text_sim_metrics: similarity_layer = Lambda(lambda x: K.exp(-K.sum(K.abs(x[0]-x[1]), axis=1, keepdims=True))) similarity_layers.append(similarity_layer) if 'inverse_l2' in text_sim_metrics: similarity_layer = Lambda(lambda x: \ K.exp(-K.sqrt(K.sum(K.pow(x[0]-x[1], 2), axis=1, keepdims=True)))) similarity_layers.append(similarity_layer) # for each attribute, calculate similarities between left and ride sides similarity_tensors = list() for composition, column, similarity_layer in \ it.product(text_compositions, text_columns, similarity_layers): similarity_tensor = similarity_layer([composed_tensors[composition]['left'][column], composed_tensors[composition]['right'][column]]) similarity_tensors.append(similarity_tensor) if 'bi_lstm' in text_compositions: difference_layer = Lambda(lambda x: K.abs(x[0]-x[1])) hadamard_layer = Lambda(lambda x: x[0] * x[1]) for column in text_columns: difference_tensor = difference_layer([composed_tensors['bi_lstm']['left'][column], composed_tensors['bi_lstm']['right'][column]]) hadamard_tensor = hadamard_layer([composed_tensors['bi_lstm']['left'][column], composed_tensors['bi_lstm']['right'][column]]) similarity_tensors.extend([difference_tensor, hadamard_tensor]) # reset similarity layer to empty so only numeric-based similarities are used similarity_layers = list() if 'scaled_inverse_lp' in numeric_sim_metrics: similarity_layer = Lambda(lambda x: K.exp(-2 * K.abs(x[0]-x[1]) / (x[0] + x[1] + 1e-5))) similarity_layers.append(similarity_layer) if 'unscaled_inverse_lp' in numeric_sim_metrics: similarity_layer = Lambda(lambda x: K.exp(-K.abs(x[0]-x[1]))) similarity_layers.append(similarity_layer) for column, similarity_layer in it.product(numeric_columns, similarity_layers): similarity_tensor = similarity_layer([input_tensors['left'][column], input_tensors['right'][column]]) similarity_tensors.append(similarity_tensor) if 'min_max_ratio' in numeric_sim_metrics: for column in numeric_columns: num_concat = Concatenate(axis=-1)([input_tensors['left'][column], input_tensors['right'][column]]) similarity_layer = Lambda(lambda x: K.min(x, axis=1, keepdims=True) / \ (K.max(x, axis=1, keepdims=True) + 1e-5)) similarity_tensors.append(similarity_layer(num_concat)) # create input tensors from _isna attributes input_isna_tensors = list() if make_isna: for side, column in it.product(sides, columns): input_isna_tensors.append(Input(shape=(1,))) num_dense_inputs = len(similarity_tensors) + len(input_isna_tensors) if 'lstm ' in text_compositions or 'bi_lstm' in text_compositions: num_dense_inputs += lstm_args['units'] * len(text_columns) print('Number of inputs to dense layer: {}'.format(num_dense_inputs)) # concatenate similarity tensors with isna_tensors. concatenated_tensors = Concatenate(axis=-1)(similarity_tensors + \ input_isna_tensors) # create dense layers starting with concatenated tensors dense_tensors = [concatenated_tensors] for n_nodes in dense_nodes: fc = Dense(n_nodes, activation='relu', name='output') print(type(fc)) dense_tensor = fc(dense_tensors[-1]) # with tf.Session() as sess: # print(sess.run(dense_tensor)) # print(dense_tensor.numpy()) # print(dense_tensor.numpy()) if batch_norm and dropout: dense_tensor_bn = BatchNormalization(name='batchnormal')(dense_tensor) dense_tensor_dropout = Dropout(dropout)(dense_tensor_bn) dense_tensors.append(dense_tensor_dropout) else: dense_tensors.append(dense_tensor) dense_tensors.pop(0) output_tensors = Dense(2, activation='softmax')(dense_tensors[-1]) product = list(it.product(sides, columns)) if not debug: model = Model([input_tensors[s][tc] for s, tc in product] + input_isna_tensors, [output_tensors]) else: model = Model([input_tensors[s][tc] for s, tc in product] + input_isna_tensors, [embedded_tensors['left'][text_columns[0]]]) return tuple([model] + list(packaged_data.values()) + [y_train, y_val, y_test])
def generalised_dice_loss(prediction, ground_truth, weight_map=None, type_weight='Square'): """ Function to calculate the Generalised Dice Loss defined in Sudre, C. et. al. (2017) Generalised Dice overlap as a deep learning loss function for highly unbalanced segmentations. DLMIA 2017 :param prediction: the logits :param ground_truth: the segmentation ground truth :param weight_map: :param type_weight: type of weighting allowed between labels (choice between Square (square of inverse of volume), Simple (inverse of volume) and Uniform (no weighting)) :return: the loss """ prediction = tf.cast(prediction, tf.float32) if len(ground_truth.shape) == len(prediction.shape): ground_truth = ground_truth[..., -1] one_hot = labels_to_one_hot(ground_truth, tf.shape(prediction)[-1]) if weight_map is not None: n_classes = prediction.shape[1].value weight_map_nclasses = tf.reshape(tf.tile(weight_map, [n_classes]), prediction.get_shape()) ref_vol = tf.sparse_reduce_sum(weight_map_nclasses * one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum(weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum(tf.multiply(weight_map_nclasses, prediction), 0) else: ref_vol = tf.sparse_reduce_sum(one_hot, [0, 1, 2]) intersect = tf.sparse_reduce_sum(one_hot * prediction, [0, 1, 2]) seg_vol = tf.reduce_sum(prediction, [0, 1, 2]) if type_weight == 'Square': weights = tf.reciprocal(tf.square(ref_vol)) elif type_weight == 'Simple': weights = tf.reciprocal(ref_vol) elif type_weight == 'Uniform': weights = tf.ones_like(ref_vol) elif type_weight == 'Fixed': weights = tf.constant([ 0.0006, 0.0006, 0.1656, 0.1058, 0.0532, 0.0709, 0.1131, 0.3155, 0.1748 ]) #W3 = 1/sqrt(freq) else: raise ValueError("The variable type_weight \"{}\"" "is not defined.".format(type_weight)) new_weights = tf.where(tf.is_inf(weights), tf.zeros_like(weights), weights) weights = tf.where(tf.is_inf(weights), tf.ones_like(weights) * tf.reduce_max(new_weights), weights) generalised_dice_numerator = 2 * tf.reduce_sum( tf.multiply(weights, intersect)) generalised_dice_denominator = tf.reduce_sum( tf.multiply(weights, tf.maximum(seg_vol + ref_vol, 1))) # generalised_dice_denominator = tf.reduce_sum(tf.multiply(weights, seg_vol + ref_vol)) + 1e-6 generalised_dice_score = generalised_dice_numerator / generalised_dice_denominator generalised_dice_score = tf.where(tf.is_nan(generalised_dice_score), 1.0, generalised_dice_score) return 1 - generalised_dice_score
def build_net_run_on_multi_gpus_nccl(self): if not os.path.exists(self.log_dir): wmlu.create_empty_dir(self.log_dir) if not os.path.exists(self.ckpt_dir): wmlu.create_empty_dir(self.ckpt_dir) '''if self.cfg.GLOBAL.DEBUG: data[IMAGE] = tf.Print(data[IMAGE],[tf.shape(data[IMAGE]),data[ORG_HEIGHT],data[ORG_WIDTH],data[HEIGHT],data[WIDTH]],summarize=100, name="XXXXX")''' all_loss_dict = {} steps = self.cfg.SOLVER.STEPS print("Train steps:", steps) lr = wnn.build_learning_rate( self.cfg.SOLVER.BASE_LR, global_step=self.global_step, lr_decay_type=self.cfg.SOLVER.LR_DECAY_TYPE, steps=steps, decay_factor=self.cfg.SOLVER.LR_DECAY_FACTOR, total_steps=steps[-1], min_lr=1e-6, warmup_steps=self.cfg.SOLVER.WARMUP_ITERS) tf.summary.scalar("lr", lr) self.max_train_step = steps[-1] if self.cfg.SOLVER.OPTIMIZER == "Momentum": opt = wnn.str2optimizer( "Momentum", lr, momentum=self.cfg.SOLVER.OPTIMIZER_momentum) else: opt = wnn.str2optimizer(self.cfg.SOLVER.OPTIMIZER, lr) tower_grads = [] if len(self.gpus) == 0: self.gpus = [0] if len(self.cfg.SOLVER.TRAIN_SCOPES) > 1: train_scopes = self.cfg.SOLVER.TRAIN_SCOPES else: train_scopes = None if len(self.cfg.SOLVER.TRAIN_REPATTERN) > 1: train_repattern = self.cfg.SOLVER.TRAIN_REPATTERN else: train_repattern = None for i in range(len(self.gpus)): scope = tf.get_variable_scope() if i > 0: #scope._reuse = tf.AUTO_REUSE scope.reuse_variables() with tf.device(f"/gpu:{i}"): with tf.device(":/cpu:0"): data = self.data.get_next() self.input_data = data with tf.name_scope(f"GPU{self.gpus[i]}"): with tf.device(":/cpu:0"): DataLoader.detection_image_summary( data, name=f"data_source{i}") self.res_data, loss_dict = self.model.forward(data) loss_values = [] for k, v in loss_dict.items(): all_loss_dict[k + f"_stage{i}"] = v tf.summary.scalar(f"loss/{k}", v) ## #v = tf.Print(v,[k,tf.is_nan(v), tf.is_inf(v)]) ## v = tf.cond(tf.logical_or(tf.is_nan(v), tf.is_inf(v)), lambda: tf.zeros_like(v), lambda: v) loss_values.append(v) scope._reuse = tf.AUTO_REUSE '''if (i==0) and len(tf.get_collection(GRADIENT_DEBUG_COLLECTION))>0: total_loss_sum = tf.add_n(loss_values) xs = tf.get_collection(GRADIENT_DEBUG_COLLECTION) grads = tf.gradients(total_loss_sum,xs) grads = [tf.reduce_sum(tf.abs(x)) for x in grads] loss_values[0] = tf.Print(loss_values[0],grads+["grads"],summarize=100)''' grads, total_loss, variables_to_train = wnn.nget_train_opv3( optimizer=opt, loss=loss_values, scopes=train_scopes, re_pattern=train_repattern) # if self.cfg.SOLVER.FILTER_NAN_AND_INF_GRADS: grads = [list(x) for x in grads] for i, (g, v) in enumerate(grads): try: if g is not None: g = tf.where( tf.logical_or(tf.is_nan(g), tf.is_inf(g)), tf.random_normal( shape=wmlt. combined_static_and_dynamic_shape(g), stddev=1e-5), g) except: print(f"Error {g}/{v}") raise Exception("Error") grads[i][0] = g # tower_grads.append(grads) ######################## '''tower_grads[0] = [list(x) for x in tower_grads[0]] for i,(g,v) in enumerate(tower_grads[0]): tower_grads[0][i][0] = tf.Print(g,["B_"+v.name,tf.reduce_min(g),tf.reduce_mean(g),tf.reduce_max(g)])''' ######################## if self.cfg.SOLVER.CLIP_NORM > 1: avg_grads = wnn.average_grads_nccl( tower_grads, clip_norm=self.cfg.SOLVER.CLIP_NORM) else: avg_grads = wnn.average_grads_nccl(tower_grads, clip_norm=None) '''avg_grads = [list(x) for x in avg_grads] for i,(g,v) in enumerate(avg_grads): avg_grads[i][0] = tf.Print(g,[v.name,tf.reduce_min(g),tf.reduce_mean(g),tf.reduce_max(g)])''' opt0 = wnn.apply_gradientsv3(avg_grads, self.global_step, opt) opt1 = wnn.get_batch_norm_ops() self.train_op = tf.group(opt0, opt1) self.total_loss, self.variables_to_train = total_loss, variables_to_train self.loss_dict = all_loss_dict config = tf.ConfigProto(allow_soft_placement=True) #config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) if self.debug_tf: self.sess = tfdbg.LocalCLIDebugWrapperSession(self.sess) print("variables to train:") wmlu.show_list(self.variables_to_train) for v in self.variables_to_train: wsummary.histogram_or_scalar(v, v.name[:-2]) wnn.log_moving_variable() self.saver = tf.train.Saver(max_to_keep=100) tf.summary.scalar("total_loss", self.total_loss) self.summary = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter(self.log_dir, self.sess.graph) init = tf.global_variables_initializer() self.sess.run(init) print("batch_norm_ops.") wmlu.show_list( [x.name for x in tf.get_collection(tf.GraphKeys.UPDATE_OPS)])
def prepare_model(self): '''''' ''' ================================== initialize the variable and constant ================================== ''' self.input_R = tf.placeholder(dtype=tf.float32, shape=[None, self.num_items], name="input_R") self.input_mask_R = tf.placeholder(dtype=tf.float32, shape=[None, self.num_items], name="input_mask_R") b_u = tf.get_variable(name="b_u", initializer=tf.zeros(shape=[self.num_users, 1]), dtype=tf.float32) b_j = tf.get_variable(name="b_j", initializer=tf.zeros(shape=[self.num_items, 1]), dtype=tf.float32) p_u = tf.get_variable(name="p_u", initializer=tf.truncated_normal( shape=[self.num_users, self.hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) q_j = tf.get_variable(name="q_j", initializer=tf.truncated_normal( shape=[self.num_items, self.hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) y_i = tf.get_variable(name="y_i", initializer=tf.truncated_normal( shape=[self.num_items, self.hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) w_v = tf.get_variable(name="w_v", initializer=tf.truncated_normal( shape=[self.num_users, self.hidden_neuron], mean=0, stddev=0.03), dtype=tf.float32) mu = tf.cast( np.sum(self.train_R) / float(self.num_train_ratings), tf.float32) I_u = tf.reduce_sum(self.input_mask_R, 1) U_j = tf.reduce_sum(self.input_mask_R, 0) T_u = tf.reduce_sum(self.trust_matrix, 1) T_v = tf.reduce_sum(self.trust_matrix, 0) inverse_I_u = tf.div(tf.constant(1, tf.float32), I_u) inverse_U_j = tf.div(tf.constant(1, tf.float32), U_j) inverse_T_u = tf.div(tf.constant(1, tf.float32), T_u) inverse_T_v = tf.div(tf.constant(1, tf.float32), T_v) sqrt_inverse_I_u = tf.sqrt( tf.reshape( tf.where(tf.is_inf(inverse_I_u), tf.ones_like(inverse_I_u) * 0, inverse_I_u), [self.num_users, 1])) sqrt_inverse_U_j = tf.sqrt( tf.reshape( tf.where(tf.is_inf(inverse_U_j), tf.ones_like(inverse_U_j) * 0, inverse_U_j), [self.num_items, 1])) sqrt_inverse_T_u = tf.sqrt( tf.reshape( tf.where(tf.is_inf(inverse_T_u), tf.ones_like(inverse_T_u) * 0, inverse_T_u), [self.num_users, 1])) sqrt_inverse_T_v = tf.sqrt( tf.reshape( tf.where(tf.is_inf(inverse_T_v), tf.ones_like(inverse_T_v) * 0, inverse_T_v), [self.num_users, 1])) ''' ======================================================================================================== ''' ''' ================================== make r_hat ================================== ''' pre_r_hat1 = tf.matmul(b_u , tf.ones([1,self.num_items],dtype=tf.float32)) \ + tf.matmul(tf.ones([self.num_users,1],dtype=tf.float32) , tf.transpose(b_j))\ + mu * tf.ones([self.num_users,self.num_items],dtype=tf.float32) pre_r_hat2 = tf.matmul(p_u, tf.transpose(q_j)) temp_r_hat3_1 = [] temp_r_hat3_2 = [] for user in range(self.num_users): user_specific_mask_r = self.input_mask_R[user, :] user_specific_trust_matrix = self.trust_matrix[user, :] zero = tf.constant(0, dtype=tf.float32) if I_u[user] == 0: temp_r_hat3_1.append(tf.zeros(shape=[self.hidden_neuron])) else: where = tf.not_equal(user_specific_mask_r, zero) indices = tf.cast(tf.where(where), tf.int32) indexed_y_i = tf.gather_nd(y_i, indices) sum_y_i = tf.reduce_sum(indexed_y_i, 0) * sqrt_inverse_I_u[user] temp_r_hat3_1.append(sum_y_i) # if np.sum(self.trust_matrix[user,:]) == 0: if T_u[user] == 0: temp_r_hat3_2.append(tf.zeros(shape=[self.hidden_neuron])) else: where = tf.not_equal(user_specific_trust_matrix, zero) indices = tf.cast(tf.where(where), tf.int32) indexed_w_v = tf.gather_nd(w_v, indices) sum_w_v = tf.reduce_sum(indexed_w_v, 0) * sqrt_inverse_T_u[user] temp_r_hat3_2.append(sum_w_v) temp_r_hat3_1 = tf.stack(temp_r_hat3_1) temp_r_hat3_2 = tf.stack(temp_r_hat3_2) pre_r_hat3 = tf.matmul(temp_r_hat3_1, tf.transpose(q_j)) + tf.matmul( temp_r_hat3_2, tf.transpose(q_j)) self.r_hat = pre_r_hat1 + pre_r_hat2 + pre_r_hat3 ''' ======================================================================================================== ''' ''' ================================== make t_hat ================================== ''' self.t_hat = tf.matmul(p_u, tf.transpose(w_v)) ''' ======================================================================================================== ''' ''' ================================== make cost ================================== ''' cost1 = 0.5 * tf.reduce_sum(tf.multiply(tf.square(self.r_hat - self.input_R) , self.input_mask_R)) \ + 0.5 * self.lambda_t_value * tf.reduce_sum(tf.multiply(tf.square(self.t_hat - self.trust_matrix) , self.trust_matrix)) cost2 = 0.5 * self.lambda_value * tf.matmul(tf.transpose(sqrt_inverse_I_u),tf.square(b_u)) \ + 0.5 * self.lambda_value * tf.matmul(tf.transpose(sqrt_inverse_U_j),tf.square(b_j)) pre_cost3 = tf.transpose(0.5 * self.lambda_value * sqrt_inverse_I_u + 0.5 * self.lambda_t_value * sqrt_inverse_T_u) frob_p_u = tf.reshape(tf.reduce_sum(tf.square(p_u), 1), [self.num_users, 1]) cost3 = tf.matmul(pre_cost3, frob_p_u) frob_q_j = tf.reshape(tf.reduce_sum(tf.square(q_j), 1), [self.num_items, 1]) frob_y_i = tf.reshape(tf.reduce_sum(tf.square(y_i), 1), [self.num_items, 1]) cost4 = 0.5 * self.lambda_value * tf.matmul(tf.transpose(sqrt_inverse_U_j),frob_q_j) \ + 0.5 * self.lambda_value * tf.matmul(tf.transpose(sqrt_inverse_U_j),frob_y_i) frob_w_v = tf.reshape(tf.reduce_sum(tf.square(w_v), 1), [self.num_users, 1]) cost5 = 0.5 * self.lambda_value * tf.matmul( tf.transpose(sqrt_inverse_T_v), frob_w_v) self.cost = tf.squeeze(cost1 + cost2 + cost3 + cost4 + cost5) #self.cost = tf.squeeze(cost1)# + cost3 + cost4 + cost5) ''' ======================================================================================================== ''' if self.optimizer_method == "Adam": optimizer = tf.train.AdamOptimizer(self.lr) elif self.optimizer_method == "Adadelta": optimizer = tf.train.AdadeltaOptimizer(self.lr) elif self.optimizer_method == "Adagrad": optimizer = tf.train.AdadeltaOptimizer(self.lr) elif self.optimizer_method == "RMSProp": optimizer = tf.train.RMSPropOptimizer(self.lr) elif self.optimizer_method == "GradientDescent": optimizer = tf.train.GradientDescentOptimizer(self.lr) elif self.optimizer_method == "Momentum": optimizer = tf.train.MomentumOptimizer(self.lr, 0.9) else: raise ValueError("Optimizer Key ERROR") #self.optimizer = optimizer.minimize(self.cost) gvs = optimizer.compute_gradients(self.cost) capped_gvs = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gvs] self.optimizer = optimizer.apply_gradients( capped_gvs, global_step=self.global_step)
def network(xraw, labels, valid_mask, out_label, x_st, y_st, w_st, h_st): with tf.variable_scope('base_net') as scope: # conv1 conv1 = tf.layers.conv2d(inputs=xraw,filters=8,kernel_size=[3,3],padding='SAME',name='conv1') bn1 = tf.layers.batch_normalization(conv1, name='bn1') r1 = tf.nn.relu(bn1, name='r1') mp1 = tf.layers.max_pooling2d(r1,pool_size=2,strides=2,padding='valid',name='mp1') # conv2 conv2 = tf.layers.conv2d(inputs=mp1,filters=16,kernel_size=[3,3],padding='SAME',name='conv2') bn2 = tf.layers.batch_normalization(conv2, name='bn2') r2 = tf.nn.relu(bn2, name='r2') mp2 = tf.layers.max_pooling2d(r2,pool_size=2,strides=2,padding='valid',name='mp2') # conv3 conv3 = tf.layers.conv2d(inputs=mp2,filters=32,kernel_size=[3,3],padding='SAME',name='conv3') bn3 = tf.layers.batch_normalization(conv3, name='bn3') r3 = tf.nn.relu(bn3, name='r3') mp3 = tf.layers.max_pooling2d(r3,pool_size=2,strides=2,padding='valid',name='mp3') # conv4 conv4 = tf.layers.conv2d(inputs=mp3,filters=64,kernel_size=[3,3],padding='SAME',name='conv4') bn4 = tf.layers.batch_normalization(conv4, name='bn4') r4 = tf.nn.relu(bn4, name='r4') mp4 = tf.layers.max_pooling2d(r4,pool_size=2,strides=2,padding='valid',name='mp4') # conv5 conv5 = tf.layers.conv2d(inputs=mp4,filters=128,kernel_size=[3,3],padding='SAME',name='conv5') bn5 = tf.layers.batch_normalization(conv5, name='bn5') r5 = tf.nn.relu(bn5, name='r5') sptin = tf.reshape(r5, [batch_size, r5.shape[1], r5.shape[2], r5.shape[3]]) with tf.variable_scope('rpn') as scope2: # intermiddiate convinter = tf.layers.conv2d(inputs=r5,filters=128,kernel_size=[3,3],padding='SAME',name='convinter') bninter = tf.layers.batch_normalization(convinter, name='bninter') rinter = tf.nn.relu(bninter, name='rinter') with tf.variable_scope('cls') as scope3: # cls clsx = tf.layers.conv2d(inputs=rinter,filters=1,kernel_size=[1,1],padding='SAME',name='clsx') loss_cl = tf.nn.sigmoid_cross_entropy_with_logits(logits=clsx, labels=tf.cast(labels, tf.float32)) tf_loss = tf.reduce_mean(tf.boolean_mask(loss_cl, valid_mask)) sig_clsx = tf.sigmoid(clsx) amax = tf.argmax(tf.reshape(sig_clsx, [tf.shape(sig_clsx)[0], -1, 1]), axis=1, output_type=tf.int32) acc = tf.round(sig_clsx) acc1 = tf.cast(tf.equal(tf.cast(acc, tf.float32), labels), tf.float32) accuracy_tr = tf.reduce_mean(tf.boolean_mask(acc1, valid_mask)) with tf.variable_scope('reg') as scope4: # reg regx = tf.layers.conv2d(inputs=rinter,filters=4,kernel_size=[1,1],bias_initializer=tf.constant_initializer([4,4,8,8]),padding='SAME',name='regx') def smooth_l1(x): ab = tf.abs(x) return tf.where(ab<1, 0.5 * tf.square(x), ab - 0.5) regx_oc = tf.reshape(regx, [tf.shape(regx)[0], -1, 4]) idx = tf.concat([tf.expand_dims(tf.range(tf.shape(regx)[0], dtype=tf.int32), 1), amax[:, 0:1]], axis=1) regx_oc = tf.gather_nd(regx_oc, idx) w_a = float(48) t_x = (regx[:, :, :, 0] - x_st) / w_a t_y = (regx[:, :, :, 1] - y_st) / w_a t_w = tf.log((regx[:, :, :, 2] / (w_st + 1e-5)) + 1e-5) t_h = tf.log((regx[:, :, :, 3] / (h_st + 1e-5)) + 1e-5) t_x = tf.where(tf.is_nan(t_x), tf.zeros_like(t_x), t_x) t_y = tf.where(tf.is_nan(t_y), tf.zeros_like(t_y), t_y) t_w = tf.where(tf.is_nan(t_w), tf.zeros_like(t_w), t_w) t_h = tf.where(tf.is_nan(t_h), tf.zeros_like(t_h), t_h) t_x = tf.where(tf.is_inf(t_x), tf.zeros_like(t_x), t_x) t_y = tf.where(tf.is_inf(t_y), tf.zeros_like(t_y), t_y) t_w = tf.where(tf.is_inf(t_w), tf.zeros_like(t_w), t_w) t_h = tf.where(tf.is_inf(t_h), tf.zeros_like(t_h), t_h) loss_reg = (smooth_l1(t_x) + smooth_l1(t_y) + smooth_l1(t_w) + smooth_l1(t_h))/4.0 loss_reg = tf.boolean_mask(loss_reg, valid_m[:,:,:,0]) loss_reg = tf.reduce_mean(loss_reg) # Faster RCNN theta = tf.transpose( tf.convert_to_tensor( [regx_oc[:, 2]*16 / 128.0, tf.zeros(shape=[tf.shape(regx_oc)[0]], dtype=tf.float32), (regx_oc[:, 0]*16 - 64) / 64.0, tf.zeros(shape=[tf.shape(regx_oc)[0]], dtype=tf.float32), regx_oc[:, 3]*16 / 128.0, (regx_oc[:, 1]*16 - 64) / 64.0] ) ) cropped_images = transformer(U=xraw, theta=theta, out_size=(22, 22), name='sp1') sptx = transformer(U=sptin, theta=theta, out_size=(4, 4), name='sp2') print(sptx) # conv6 conv6 = tf.layers.conv2d(inputs=sptx,filters=128,kernel_size=[3,3],padding='SAME',name='conv6') bn6 = tf.layers.batch_normalization(conv6, name='bn6') r6 = tf.nn.relu(bn6, name='r6') # conv7 conv7 = tf.layers.conv2d(inputs=r6,filters=128,kernel_size=[3,3],padding='SAME',name='conv7') bn7 = tf.layers.batch_normalization(conv7, name='bn7') r7 = tf.nn.relu(bn7, name='r7') # fc1 fc1 = tf.layers.dense(tf.reshape(r7, [-1, r7.shape[1]*r7.shape[2]*r7.shape[3]]),2,name='fc1') out_label = tf.reshape(out_label, [tf.shape(out_label)[0], -1, 1]) label_cls = tf.gather_nd(out_label, tf.concat([tf.expand_dims(tf.range(tf.shape(out_label)[0], dtype=tf.int32), 1), amax[:, 0:1]], axis=1)) print(label_cls) label_cls = tf.cast(label_cls, tf.int32) # Softmax with tf.variable_scope('sm') as scope5: loss_cls_final = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc1, labels=tf.one_hot(label_cls, 2)) loss_cls_final = tf.reduce_mean(loss_cls_final) y_hat = tf.cast(tf.argmax(tf.nn.softmax(fc1), axis=1), tf.int32) correct_prediction = tf.cast(tf.equal(y_hat, label_cls), tf.float32) accuracy_final = tf.reduce_mean(correct_prediction) # accuracy_final = tf.reduce_mean(tf.to_float(tf.equal(tf.argmax(tf.nn.softmax(fc1), axis=1, output_type=tf.int32), label_cls))) return clsx, tf_loss, accuracy_tr, sig_clsx, loss_reg, regx, cropped_images, accuracy_final, loss_cls_final, sptx, label_cls
def SanitizedAutoCorrelation(x, axis, *args, **kwargs): res = tfp.stats.auto_correlation(x, axis, *args, **kwargs) res = tf.where(tf.is_nan(res), tf.ones_like(res), res) res = tf.where(tf.is_inf(res), tf.ones_like(res), res) return res
x_tf = tf.constant(x) y_tf = tf.constant(y) t_tf = x_tf / y_tf m_tf = tf.reduce_mean(t_tf) v_tf = tf.reduce_mean((t_tf - m_tf)**2) final = (t_tf - m_tf) / tf.sqrt(v_tf) # In[36]: print(sess.run(final)) # So now we want to go nan/inf hunting again in TensorFlow. # In[41]: print(sess.run(tf.reduce_any(tf.logical_or(tf.is_inf(t_tf), tf.is_nan(t_tf))))) print(sess.run(tf.logical_or(tf.is_inf(t_tf), tf.is_nan(t_tf)))) # Or using the shorthand for tf.logical_or # print(sess.run(tf.is_inf(t_tf) | tf.is_nan(t_tf))) # I can still print known elements of Tensors, but conditionals will be challenging mid-way through the computation graph. # In[42]: print(sess.run(t_tf[1])) # What we did in NumPy is not strictly possible in TensorFlow (this will throw a lot of errors). However, we can still use things like `tf.cond` and `tf.where` along with any of the `tf.reduce_*` operations. # In[57]: # sess.run(t_tf[tf.where(tf.is_inf(t_tf) | tf.is_nan(t_tf))])
def project_and_occlude_particles(particles, im_size, projection_matrix, particles_mask=None, p_radius=None, xyz_dims=[0, 3], particles_agent_centered=True, camera_matrix=None, coord_max=1000.0): ''' particles: [B,T,N,D] where the last axis contains both xyz_dims and color_dims (slices) im_size: [H,W] ints projection_matrix: [B,T,4,4] the matrix for projecting from euclidean space into canonical image coordinates xyz_dims: [xdim, zdim+1] must have a difference of 3 p_radius: float that determines how particles will occlude one another. particles_agent_centered: if false, use camera_matrix ("V") to put particles in camera-relative coordinates camera_matrix: [B,T,4,4] the matrix "V" for converting global to camera-relative coordinates returns particles_im_indices: [B,T,N,2] of int32 indices into H and W dimensions of an image of size H,W not_occluded_mask: [B,T,N,1] float32 where 1.0 indicates the particle wasn't occluded at radius p_radius, 0.0 otherwise ''' B, T, N, D = particles.shape.as_list() H, W = im_size if p_radius is None: p_radius = 3.0 * (np.minimum(H, W).astype(float) / 256.0) print("p radius", p_radius) # get indices into image particles_xyz = particles[..., xyz_dims[0]:xyz_dims[1]] if not particles_agent_centered: assert camera_matrix is not None, "need a camera matrix to put into agent coordinates" particles_agent = raw_to_agent_particles(particles_xyz, Vmat=camera_matrix) else: particles_agent = particles_xyz # [B,T,N,2] <tf.float32> values in range [0,H]x[0,W] particles_depths = particles_agent[..., -1:] particles_im_coordinates = agent_particles_to_image_coordinates( particles_agent, Pmat=projection_matrix, H_out=H, W_out=W, to_integers=False) # clip coordinates particles_im_coordinates = tf.where( tf.logical_or(tf.is_nan(particles_im_coordinates), tf.is_inf(particles_im_coordinates)), coord_max * tf.ones_like(particles_im_coordinates), # true particles_im_coordinates # false ) particles_im_coordinates = tf.maximum( tf.minimum(particles_im_coordinates, coord_max), -coord_max) # particles_depths = tf.Print(particles_depths, [particles_depths.shape[2], tf.reduce_max(particles_depths), tf.reduce_min(particles_im_coordinates), tf.reduce_max(particles_im_coordinates)], message='p_depth/coords') # resolve occlusions not_occluded_mask = occlude_particles_in_camera_volume( particles_im_coordinates, particles_depths, p_radius=p_radius, particles_mask=particles_mask # particles_mask=None ) particles_im_indices = tf.cast(tf.round(particles_im_coordinates), dtype=tf.int32) particles_im_indices = tf.minimum( tf.maximum(particles_im_indices, 0), tf.reshape(tf.constant(np.array([H - 1, W - 1]), dtype=tf.int32), [1, 1, 1, 2])) return particles_im_indices, not_occluded_mask, particles_im_coordinates # last is float
def count_inf(tensor): return tf.reduce_sum( tf.cast(tf.is_inf(tf.cast(tensor, tf.float32)), tf.float32))
def generalised_dice_loss(prediction, ground_truth, weight_map=None, type_weight='Custom', pos_weight=1, **kwargs): """ Function to calculate the Generalised Dice Loss defined in Sudre, C. et. al. (2017) Generalised Dice overlap as a deep learning loss function for highly unbalanced segmentations. DLMIA 2017 :param prediction: the logits :param ground_truth: the segmentation ground truth :param weight_map: :param type_weight: type of weighting allowed between labels (choice between Square (square of inverse of volume), Simple (inverse of volume) and Uniform (no weighting)) :return: the loss """ # need to convert logits to probabilities # convert binary label probabilities to categorical probabilities prediction = tf.concat([1 - prediction, prediction], axis=3) prediction = tf.cast(prediction, tf.float32) if len(ground_truth.shape) == len(prediction.shape): ground_truth = ground_truth[..., -1] one_hot = labels_to_one_hot(ground_truth, tf.shape(prediction)[-1]) if weight_map is not None: n_classes = prediction.shape[1].value # weight_map_nclasses = tf.reshape( # tf.tile(weight_map, [n_classes]), prediction.get_shape()) weight_map_nclasses = tf.tile( tf.expand_dims(tf.reshape(weight_map, [-1]), 1), [1, n_classes]) ref_vol = tf.sparse_reduce_sum(weight_map_nclasses * one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum(weight_map_nclasses * one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum(tf.multiply(weight_map_nclasses, prediction), 0) else: ref_vol = tf.sparse_reduce_sum(one_hot, reduction_axes=[0]) intersect = tf.sparse_reduce_sum(one_hot * prediction, reduction_axes=[0]) seg_vol = tf.reduce_sum(prediction, 0) if type_weight == 'Square': weights = tf.reciprocal(tf.square(ref_vol)) elif type_weight == 'Simple': weights = tf.reciprocal(ref_vol) elif type_weight == 'Uniform': weights = tf.ones_like(ref_vol) elif type_weight == "Custom": # TODO: why reduce the sum by batch? applicable above as well weights = ref_vol * tf.constant([1, pos_weight]) else: raise ValueError("The variable type_weight \"{}\"" "is not defined.".format(type_weight)) new_weights = tf.where(tf.is_inf(weights), tf.zeros_like(weights), weights) weights = tf.where(tf.is_inf(weights), tf.ones_like(weights) * tf.reduce_max(new_weights), weights) generalised_dice_numerator = \ 2 * tf.reduce_sum(tf.multiply(weights, intersect)) generalised_dice_denominator = tf.reduce_sum( tf.multiply(weights, tf.maximum(seg_vol + ref_vol, 1))) generalised_dice_score = \ generalised_dice_numerator / generalised_dice_denominator generalised_dice_score = tf.where(tf.is_nan(generalised_dice_score), 1.0, generalised_dice_score) return 1 - generalised_dice_score
def tf_safe_log(value, replacement_value=-100.0): log_value = tf.log(value + 1e-9) replace = tf.logical_or(tf.is_nan(log_value), tf.is_inf(log_value)) log_value = tf.where(replace, replacement_value * tf.ones_like(log_value), log_value) return log_value
def ScaleGradients(self, var_grads, gradient_adjuster=None): """Scales gradients according to training params. Args: var_grads: a `.NestedMap` whose values are (var, grad) pairs. gradient_adjuster: if not None, a function that mutates a given var_grads. Returns: A `.NestedMap` containing: - has_nan_or_inf: a scalar of 0 or 1, indicating whether there is any NaN or Inf in input gradients. - final_var_grads: a `.NestedMap` whose values are (var, grad) pairs, where gradients have already been scaled. - grad_scale: the gradient scale. 0 if gradient updates should be skipped for the step. (Optional, only returned in case global norm clipping is used.) """ p = self.params # Computes gradients' norm and adds their summaries. Note that all_grad_norm # may be nan, which may cause grad_scale to be nan. for name, vg in var_grads.FlattenItems(): summary_utils.AddNormSummary(name + '/' + p.name, py_utils.NestedMap(s=vg)) all_grad_norm = tf.sqrt( py_utils.SumSquared([ g for (_, g) in py_utils.NestedMap(child=var_grads).Flatten() ])) all_var_norm = tf.sqrt( py_utils.SumSquared([ v for (v, _) in py_utils.NestedMap(child=var_grads).Flatten() ])) grad_norm_is_nan_or_inf = tf.logical_or(tf.is_nan(all_grad_norm), tf.is_inf(all_grad_norm)) # Optional gradient adjustment. Note that this happens after computing # all_grad_norm. if gradient_adjuster is not None: tf.logging.info('gradient_adjuster=%s', gradient_adjuster) var_grads = gradient_adjuster(var_grads) # Handles NaN/Inf gradients. has_nan_or_inf = py_utils.HasNanOrInfGradient(var_grads) # Grad norm can still be inf even if none of the individual grad is inf. has_nan_or_inf = tf.logical_or(has_nan_or_inf, grad_norm_is_nan_or_inf) return_values = py_utils.NestedMap() if p.clip_gradient_single_norm_to_value: # Currently using both types of clipping simultaneously is unsupported. if p.clip_gradient_norm_to_value: raise ValueError( 'Cannot use clip_gradient_single_norm_to_value=%f and ' 'clip_gradient_norm_to_value=%f.' % (p.clip_gradient_single_norm_to_value, p.clip_gradient_norm_to_value)) final_var_grads = py_utils.ApplyGradNormCliping( var_grads, p.clip_gradient_single_norm_to_value) else: grad_scale = self._GetGlobalGradScale(all_grad_norm, has_nan_or_inf) self._AddEvalMetric('grad_norm/all', all_grad_norm, tf.constant(1.0)) self._AddEvalMetric('var_norm/all', all_var_norm, tf.constant(1.0)) self._AddEvalMetric('grad_scale_all', grad_scale, tf.constant(1.0)) final_var_grads = py_utils.ApplyGradMultiplier( var_grads, grad_scale) return_values.grad_scale = grad_scale return_values.has_nan_or_inf = has_nan_or_inf return_values.final_var_grads = final_var_grads return return_values
def ScaleGradients(self, var_grads): """Scales gradients according to training params. Args: var_grads: a `.NestedMap` whose values are (var, grad) pairs. Returns: (has_nan_or_inf, grad_scale, final_var_grads). - has_nan_or_inf: a scalar of 0 or 1, indicating whether there is any NaN or Inf in input gradients. - grad_scale: the gradient scale. 0 if gradient updates should be skipped for the step. - final_var_grads: a `.NestedMap` whose values are (var, grad) pairs, where gradients have already been scaled. """ p = self.params tp = p.train # Computes gradients' norm and adds their summaries. Note that all_grad_norm # may be nan, which may cause grad_scale to be nan. for name, vg in var_grads.FlattenItems(): summary_utils.AddNormSummary(name, py_utils.NestedMap(s=vg)) _, all_grad_norm = summary_utils.AddNormSummary('all', var_grads) grad_norm_is_nan_or_inf = tf.logical_or(tf.is_nan(all_grad_norm), tf.is_inf(all_grad_norm)) # Optional gradient adjustment. Note that this happens after computing # all_grad_norm. var_grads = self.AdjustGradients(var_grads) # Handles NaN/Inf gradients. has_nan_or_inf = self._HasNanOrInf(var_grads) # Grad norm can still be inf even if none of the individual grad is inf. has_nan_or_inf = tf.logical_or(has_nan_or_inf, grad_norm_is_nan_or_inf) # Computes gradient's scale. grad_scale = tf.constant(1.0) if tp.clip_gradient_norm_to_value: # If all_grad_norm > tp.clip_gradient_norm_to_value, scales # all_grads so that the norm is 1.0. grad_scale = tf.minimum( 1.0, tp.clip_gradient_norm_to_value / all_grad_norm) if tp.grad_norm_to_clip_to_zero: # If all_grad_norm > tp.grad_norm_to_clip_to_zero, treats # grad_scale as 0. This way, we ignore this step. grad_scale *= tf.cast(all_grad_norm < tp.grad_norm_to_clip_to_zero, p.dtype) if tp.grad_norm_tracker: grad_scale *= self.grad_norm_tracker.FPropDefaultTheta( all_grad_norm, has_nan_or_inf) # Force grad_scale to be 0 if there is any NaN or Inf in gradients. grad_scale = tf.where(has_nan_or_inf, 0.0, grad_scale) summary_utils.scalar('grad_scale_all', grad_scale) final_var_grads = py_utils.ApplyGradMultiplier(var_grads, grad_scale) return has_nan_or_inf, grad_scale, final_var_grads
def checkForNan(tensor): return tf.reduce_sum(tf.add(tf.to_float(tf.is_nan(tensor)), tf.to_float(tf.is_inf(tensor))))
train_writer = tf.summary.FileWriter("./train_graph", g) saver = tf.train.Saver() tf.summary.histogram("loss", loss) merge = tf.summary.merge_all() hm_steps = 25000 sess.run(tf.global_variables_initializer()) input_size = height for batch in shuffle(batch_size, input_size): step, Xp, Y1p, Y2p = batch if step == 0: time.sleep(1) continue debugger = tf.logical_or(tf.is_nan(loss), tf.is_inf(loss)) while (1): d, l = sess.run([debugger, loss], feed_dict={ X: Xp, Y1: Y1p, Y2: Y2p }) if (not d): break else: print("Re-random variables!") sess.run(tf.global_variables_initializer()) summary, _, lossp, lxy, lwh, lobj, lnoobj, lp = sess.run([ merge, trainer, loss, loss_xy, loss_wh, loss_obj, loss_noobj,
def __loss_per_scale(self, name, conv, pred, label, bboxes, anchors, stride): '''实际调用该函数的参数为:('loss_sbbox', conv_sbbox, pred_sbbox, label_sbbox, sbboxes,self.__anchors[0], self.__strides[0]) :param name: loss的名字 :param conv: conv是yolo卷积层的原始输出 shape为(batch_size, output_size, output_size, anchor_per_scale * (5 + num_class)) :param pred: conv是yolo输出的预测bbox的信息(x, y, w, h, conf, prob), 其中(x, y, w, h)的大小是相对于input_size的,如input_size=416,(x, y, w, h) = (120, 200, 50, 70) shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_class) :param label: shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes) 只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小 :param bboxes: shape为(batch_size, max_bbox_per_scale, 4), 存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小 bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU :param anchors: 相应detector的anchors :param stride: 相应detector的stride ''' with tf.name_scope(name): conv_shape = tf.shape(conv) batch_size = conv_shape[0] output_size = conv_shape[1] input_size = stride * output_size conv = tf.reshape(conv, (batch_size, output_size, output_size, self.__gt_per_grid, 5 + self.__num_classes)) conv_raw_dxdy = conv[:, :, :, :, 0:2] conv_raw_dwdh = conv[:, :, :, :, 2:4] conv_raw_conf = conv[:, :, :, :, 4:5] conv_raw_prob = conv[:, :, :, :, 5:] # (x, y, w, h, conf, prob) pred_xywh = pred[:, :, :, :, 0:4] pred_conf = pred[:, :, :, :, 4:5] # (batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes) # (x, y, w, h, 1, classes) label_xy = label[:, :, :, :, 0:2] label_wh = label[:, :, :, :, 2:4] label_xywh = label[..., 0:4] respond_bbox = label[:, :, :, :, 4:5] label_prob = label[:, :, :, :, 5:] # (1) y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size]) x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1]) xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1) xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, self.__gt_per_grid, 1]) xy_grid = tf.cast(xy_grid, tf.float32) label_txty = 1.0 * label_xy / stride - xy_grid label_raw_twth = tf.log((1.0 * label_wh / stride) / anchors) label_raw_twth = tf.where(tf.is_inf(label_raw_twth), tf.zeros_like(label_raw_twth), label_raw_twth) input_size = tf.cast(input_size, tf.float32) bbox_loss_scale = 2.0 - 1.0 * label_wh[:, :, :, :, 0:1] * label_wh[:, :, :, :, 1:2] / (input_size ** 2) xy_loss = respond_bbox * bbox_loss_scale * \ tf.nn.sigmoid_cross_entropy_with_logits(labels=label_txty, logits=conv_raw_dxdy) wh_loss = 0.5 * respond_bbox * bbox_loss_scale * tf.square( label_raw_twth - conv_raw_dwdh) # # # GIOU = utils.GIOU(pred_xywh, label_xywh) # GIOU = GIOU[..., np.newaxis] # input_size = tf.cast(input_size, tf.float32) # bbox_wh = label_xywh[..., 2:] - label_xywh[..., :2] # bbox_loss_scale = 2.0 - 1.0 * bbox_wh[..., 0:1] * bbox_wh[..., 1:2] / (input_size ** 2) # GIOU_loss = respond_bbox * bbox_loss_scale * (1.0 - GIOU) # iou = utils.iou_calc4(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]) max_iou = tf.reduce_max(iou, axis=-1) max_iou = max_iou[:, :, :, :, np.newaxis] respond_bgd = (1.0 - respond_bbox) * tf.cast(max_iou < self.__iou_loss_thresh, tf.float32) # ¸ºÑù±¾ conf_focal = self.__focal_loss(pred_conf, gamma=2, alph=0.25) conf_loss = conf_focal * ( respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) + respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf) ) # prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob) loss = tf.concat([xy_loss, wh_loss, conf_loss, prob_loss], axis=-1) loss = tf.reduce_mean(tf.reduce_sum(loss, axis=[1, 2, 3, 4])) return loss
def preprocess_wave(file, transformation, normalize, sr, len_sec, frame_length, frame_step, fft_length, n_mels, n_mfcc, mel_lower_edge, mel_upper_edge): def log10(x): numerator = tf.log(x) denominator = tf.log(tf.constant(10, dtype=numerator.dtype)) return numerator / denominator twenty = tf.constant(20, dtype=tf.float32) zero = tf.constant(0, dtype=tf.float32) one = tf.constant(1, dtype=tf.int32) zeroint = tf.constant(0, dtype=tf.int32) if tf.__version__ == '1.12.0': rate = tf.constant(44100, dtype=tf.int32) wave = tf.contrib.ffmpeg.decode_audio(file, file_format='wav', samples_per_second=sr, channel_count=1) elif tf.__version__ == '1.14.0': wave, rate = tf.audio.decode_wav(file, desired_channels=1, desired_samples=-1) duration = tf.constant(sr * len_sec, tf.int32) size1 = tf.shape(wave)[0] #diff = rate-size1 diff = duration - size1 diff = tf.cond(diff < 0, lambda: 0, lambda: diff) pad = tf.zeros((diff, 1)) wave = tf.concat((wave, pad), axis=0) size2 = tf.shape(wave)[0] max_start = size2 - duration + 1 start = tf.random_uniform((), minval=0, maxval=max_start, dtype=tf.int32, seed=None, name=None) x = wave[start:start + duration, :] x = tf.expand_dims(x, axis=0) x = tf.squeeze(x, axis=-1) x = x / tf.reduce_max(tf.abs(x), axis=1) if transformation == 'wave': rep = x else: X = tf.contrib.signal.stft(x, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length) #pX = tf.angle(X) if transformation == 'mag': rep = twenty * log10(tf.abs(X)) if normalize: mXmax = tf.reduce_max(rep) rep = tf.where(tf.is_inf(rep), -rep, rep) mXmin = tf.reduce_min(rep) rep = tf.where(tf.is_inf(rep), tf.ones_like(rep) * mXmin, rep) mX_half_len = (mXmax - mXmin) / 2 mXmid = mXmin + mX_half_len rep -= mXmid rep /= tf.reduce_max(tf.abs(rep)) elif transformation == 'mel': mel_matrix = tf.contrib.signal.linear_to_mel_weight_matrix( num_mel_bins=n_mels, num_spectrogram_bins=int(fft_length / 2 + 1), sample_rate=sr, lower_edge_hertz=mel_lower_edge, upper_edge_hertz=mel_upper_edge) mX = twenty * log10(tf.abs(X)) rep = tf.matmul(tf.squeeze(mX, axis=0), mel_matrix) if normalize: melmin = tf.reduce_min(rep) melmax = tf.reduce_max(rep) half_len = (melmax - melmin) / 2 melmid = melmin + half_len rep -= melmid rep /= tf.reduce_max(tf.abs(rep)) rep = tf.where(tf.is_nan(rep), tf.ones_like(rep) * -1, rep) elif transformation == 'mfcc': mel_matrix = tf.contrib.signal.linear_to_mel_weight_matrix( num_mel_bins=n_mels, num_spectrogram_bins=int(fft_length / 2 + 1), sample_rate=sr, lower_edge_hertz=mel_lower_edge, upper_edge_hertz=mel_upper_edge) mX = twenty * log10(tf.abs(X)) mel_S = tf.matmul(tf.squeeze(mX, axis=0), mel_matrix) rep = tf.contrib.signal.mfccs_from_log_mel_spectrograms(mel_S)[ ..., :n_mfcc] return rep
def __init__(self, batch_size=32, length_data=3000, n_channel=3, is_training=True, model_name="wavenet"): n_dim = 128 self.graph = tf.Graph() self.model_name = model_name with self.graph.as_default(): self.is_training = is_training self.input_data = tf.placeholder( dtype=tf.float32, shape=[batch_size, length_data, n_channel]) self.label_p = tf.placeholder(dtype=tf.int32, shape=[batch_size, length_data]) self.label_s = tf.placeholder(dtype=tf.int32, shape=[batch_size, length_data]) self.label_pt = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data, 1]) self.label_st = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data, 1]) self.weight_p = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) self.weight_s = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) self.weight_pt = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) self.weight_st = tf.placeholder(dtype=tf.float32, shape=[batch_size, length_data]) if model_name == "wavenet": net = model.wavenet(self.input_data, is_training=is_training) elif model_name == "unet": net = model.unet(self.input_data, is_training=is_training) elif model_name == "brnn": net = model.brnn(self.input_data, is_training=is_training) elif model_name == "inception": net = model.inception(self.input_data, is_training=is_training) else: raise "Model name error" with tf.variable_scope('logit_p'): self.logit_p = tf.layers.conv1d(net, 2, 3, activation=None, padding="same") with tf.variable_scope('logit_s'): self.logit_s = tf.layers.conv1d(net, 2, 3, activation=None, padding="same") with tf.variable_scope('time_p'): self.times_p = tf.layers.conv1d(net, 1, 3, activation=None, padding="same") with tf.variable_scope('time_s'): self.times_s = tf.layers.conv1d(net, 1, 3, activation=None, padding="same") loss_p = tf.contrib.seq2seq.sequence_loss(self.logit_p, self.label_p, self.weight_p) loss_s = tf.contrib.seq2seq.sequence_loss(self.logit_s, self.label_s, self.weight_s) loss_tp = tf.reduce_mean( tf.reduce_sum(tf.squeeze( (self.label_pt - self.times_p)**2) * self.weight_pt, axis=1)) loss_ts = tf.reduce_mean( tf.reduce_sum(tf.squeeze( (self.label_st - self.times_s)**2) * self.weight_st, axis=1)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) reg_loss = tf.losses.get_regularization_loss() with tf.control_dependencies(update_ops): self.loss = loss_p * 1 + loss_s * 1 + loss_tp * 1 + loss_ts * 1 + 1e-6 * reg_loss # optimizer optimizer = tf.train.AdamOptimizer() self.optimize = optimizer.minimize(self.loss) self.logit_loss = loss_p + loss_s self.times_loss = loss_tp + loss_ts self.nan = tf.is_nan(self.loss) self.inf = tf.is_inf(self.loss) self.all_var = tf.trainable_variables() self.init = tf.global_variables_initializer() self.saver = tf.train.Saver() for itr in self.all_var: print(itr.name, itr.get_shape()) self.summary = tf.summary.FileWriter("logdir", graph=self.graph)
def network(xraw, labels, valid_mask, x_st, y_st, w_st, h_st, batch_size): with tf.variable_scope('base_net') as scope: # conv1 conv1 = tf.layers.conv2d(inputs=xraw,filters=8,kernel_size=[3,3],padding='SAME',name='conv1') bn1 = tf.layers.batch_normalization(conv1, name='bn1') r1 = tf.nn.relu(bn1, name='r1') mp1 = tf.layers.max_pooling2d(r1,pool_size=2,strides=2,padding='valid',name='mp1') # conv2 conv2 = tf.layers.conv2d(inputs=mp1,filters=16,kernel_size=[3,3],padding='SAME',name='conv2') bn2 = tf.layers.batch_normalization(conv2, name='bn2') r2 = tf.nn.relu(bn2, name='r2') mp2 = tf.layers.max_pooling2d(r2,pool_size=2,strides=2,padding='valid',name='mp2') # conv3 conv3 = tf.layers.conv2d(inputs=mp2,filters=32,kernel_size=[3,3],padding='SAME',name='conv3') bn3 = tf.layers.batch_normalization(conv3, name='bn3') r3 = tf.nn.relu(bn3, name='r3') mp3 = tf.layers.max_pooling2d(r3,pool_size=2,strides=2,padding='valid',name='mp3') # conv4 conv4 = tf.layers.conv2d(inputs=mp3,filters=64,kernel_size=[3,3],padding='SAME',name='conv4') bn4 = tf.layers.batch_normalization(conv4, name='bn4') r4 = tf.nn.relu(bn4, name='r4') mp4 = tf.layers.max_pooling2d(r4,pool_size=2,strides=2,padding='valid',name='mp4') # conv5 conv5 = tf.layers.conv2d(inputs=mp4,filters=128,kernel_size=[3,3],padding='SAME',name='conv5') bn5 = tf.layers.batch_normalization(conv5, name='bn5') r5 = tf.nn.relu(bn5, name='r5') with tf.variable_scope('rpn') as scope2: # intermiddiate convinter = tf.layers.conv2d(inputs=r5,filters=128,kernel_size=[3,3],padding='SAME',name='convinter') bninter = tf.layers.batch_normalization(convinter, name='bninter') rinter = tf.nn.relu(bninter, name='rinter') with tf.variable_scope('cls') as scope3: # cls clsx = tf.layers.conv2d(inputs=rinter,filters=1,kernel_size=[1,1],padding='SAME',name='clsx') loss_cl = tf.nn.sigmoid_cross_entropy_with_logits(logits=clsx, labels=tf.cast(labels, tf.float32)) tf_loss = tf.reduce_mean(tf.boolean_mask(loss_cl, valid_mask)) sig_clsx = tf.sigmoid(clsx) acc = tf.cast(tf.equal(tf.cast(tf.greater(sig_clsx, 0.5), tf.float32), labels), tf.float32) accuracy_tr = tf.reduce_mean(tf.boolean_mask(acc, valid_mask)) with tf.variable_scope('reg'): # reg regx = tf.layers.conv2d(inputs=rinter,filters=4,kernel_size=[1,1],bias_initializer=tf.constant_initializer([4,4,8,8]),padding='SAME',name='regx') def smooth_l1(x): ab = tf.abs(x) return tf.where(ab<1, 0.5 * tf.square(x), ab - 0.5) w_a = float(48) t_x = (regx[:, :, :, 0] - x_st) / w_a t_y = (regx[:, :, :, 1] - y_st) / w_a t_w = tf.log((regx[:, :, :, 2] / (w_st + 1e-5)) + 1e-5) t_h = tf.log((regx[:, :, :, 3] / (h_st + 1e-5)) + 1e-5) t_x = tf.where(tf.is_nan(t_x), tf.zeros_like(t_x), t_x) t_y = tf.where(tf.is_nan(t_y), tf.zeros_like(t_y), t_y) t_w = tf.where(tf.is_nan(t_w), tf.zeros_like(t_w), t_w) t_h = tf.where(tf.is_nan(t_h), tf.zeros_like(t_h), t_h) t_x = tf.where(tf.is_inf(t_x), tf.zeros_like(t_x), t_x) t_y = tf.where(tf.is_inf(t_y), tf.zeros_like(t_y), t_y) t_w = tf.where(tf.is_inf(t_w), tf.zeros_like(t_w), t_w) t_h = tf.where(tf.is_inf(t_h), tf.zeros_like(t_h), t_h) loss_reg = (smooth_l1(t_x) + smooth_l1(t_y) + smooth_l1(t_w) + smooth_l1(t_h))/4.0 loss_reg = tf.boolean_mask(loss_reg, valid_m[:,:,:,0]) loss_reg = tf.reduce_mean(loss_reg) return clsx, tf_loss, accuracy_tr, sig_clsx, loss_reg, regx
def loss_per_scale(name, yolo_layer_outputs, conv_layer_outputs, y_true, y_true_boxes, ignore_thresh, anchors, num_classes=3, h=100, w=100, batch_size=32): """ Calculates and returns the loss associated with a particular layer scale. Parameters ---------- name : string The name to be used to group the operations of the loss_per_scale function in Tensorboard. yolo_layer_outputs : tensor The outputs of a yolo layer, which are the fully scaled predicted boxes in the form of 'center_x, center_y, width, height'. If an input image is of the shape (416, 416), then a sample predicted box may have coordinates of (100,80,40,53). A sample yolo layer output will be a tensor of the shape: [batch_size, yolo_layer_grid_h, yolo_layer_grid_w, num_anchors_per_layer, 5 + num_classes] The '5' represents the x_coord, y_coord, width_value, height_value, and obj_confidence_score. The yolo layer output is needed to calculate the IOU between the predicted boxes and the true boxes. conv_layer_outputs : tensor The outputs of a convolutional layer, right before they are fed into a yolo layer. The convolutional layer will be a tensor of shape: [batch_size, yolo_layer_grid_h, yolo_layer_grid_w, num_anchors_per_layer * (5 + num_classes)] The convolutional layer outputs are raw predictions which have not been passed through the logarithmic nor exponential functions necessary to predict fully scaled bounding boxes. The outputs of the convolutional layer are needed to calculate the coordinate loss, the object confidence loss, and the class loss of each detector. y_true : tensor The ground truth tensor which contains the theoretical ideal output of a corresponding yolo layer. A sample y_true tensor will be of shape: [batch_size, yolo_layer_grid_h * yolo_layer_grid_w * num_anchors_per_layer, 5 + num_classes] which will then be reshaped into the shape of: [batch_size, yolo_layer_grid_h, yolo_layer_grid_w, num_anchors_per_layer, 5 + num_classes] The '5' represents the x_coord, y_coord, width_value, height_value, and obj_confidence_score. The coordinates of the boxes in y_true are stored in terms of 'center_x', 'center_y', 'width', and 'height', and their values are percentages of the original input image size. In the case of y_true, the value at the obj_confidence_score index will always be 1 at the location of objects. y_true is needed to calculate the coordinate loss, the object confidence loss, and the class loss. y_true_boxes : tensor The ground truth boxes per image. A sample y_true_boxes tensor would be of shape: [batch_size, max_num_true_boxes_per_image, 4] The y_true_boxes are needed to compute the IOU between the predicted boxes from the yolo output layer ground truth boxes. y_true_boxes is used instead of y_true because of the significantly smaller computational cost. For each box predicted by a detector in the yolo layer, the IOU only has to be calculated between the single predicted box and the y_true_boxes. This means that only max_num_true_boxes_per_image IOU calculations need to be made per predicted box. On the other hand if y_true would be used, it would mean that yolo_layer_grid_h * yolo_layer_grid_w * num_anchors_per_layer IOU calculations would need to be made per predicted box, which is extremely expensive. ignore_thresh : float The threshold which determines how high the IOU between a predicted box and a ground truth box needs to be in order for the predicted box to be ignored in the object confidence loss. If for example the threshold is set to 0.5, then only predicted boxes that score an IOU greater than 0.5 with their ground truth boxes will be ignored. anchors : list A sublist of the anchors list, of length num_anchors/num_layers. The formatting of the sublist is as follows: [[anchor1_width, anchor1_height], [anchor2_width, anchor2_height], [anchor3_width, anchor3_height]] The anchors work across all of the layer's detectors, acting as 'guides' for the bounding box predictions. The anchors are needed to transform the y_true tensor in a way to make it comparable with the conv_layer_outputs. num_classes : int The number of classes found in the training dataset. h : int The height of the input image. w : int The width of the input image. batch_size : int The number of images per training batch. Used to help with reshaping tensors. Returns ------- loss : tensor The loss associated with the particular scale of a layer. """ def iou(name, prct_yolo_outputs, y_true_boxes, shape, batch_size): """ Calculates the IOU (Intersection over Union) between the predicted boxes (prct_yolo_outputs) and the true boxes (y_true_boxes). Every predicted box at each detector location will have an IOU calculated with all of the true boxes per image. A detector that predicts a box with a high IOU is doing a good job with its prediction, so we don't want to penalize this detector. A mask is created for the detectors whos max IOU value is above a certain threshold, which is then applied onto the term in the loss function that penalizes detectors for wrongly detecting boxes. This mask prevents the loss from increasing from detectors which have high IOUs, regardless of whether or not they should be predicting an object. Parameters ---------- name : string The name that will be used for the IOU function in the TensorBoard graph prct_yolo_outputs : tensor The outputs of a yolo layer, which are the fully scaled predicted boxes in the form of 'center_x, center_y, width, height', divided by the original input width and height to turn them into percentages. A sample yolo layer output will be a tensor of the shape: [batch_size, yolo_layer_grid_h, yolo_layer_grid_w, num_anchors_per_layer, 4] The '4' represents the x_coord, y_coord, width_value, and height_value. y_true_boxes : tensor The true boxes coordinates for each image, stored in the form of 'center_x, center_y, width, height', as percentages of the original input. y_true_boxes is a tensor of the shape: [batch_size, max_num_true_boxes_per_image, 4] The '4' represents the x_coord, y_coord, width_value, and height_value. y_true_boxes is generated by the 'create_y_true' function. shape : list The shape of the yolo layer outputs: [batch_size, yolo_layer_grid_h, yolo_layer_grid_w, num_anchors_per_layer, (5+num_classes)] The '5' represents the x_coord, y_coord, width_value, height_value, and class_number. Used to help with reshaping tensors. batch_size: int The number of images per training batch. Used to help with reshaping tensors. Returns ------- max_ious : tensor A tensor containing the max IOU for each detector. The tensor will have the shape: [batch_size, num_detectors_per_layer] num_detectors_per_layer = yolo_layer_grid_h x yolo_layer_grid_w x num_anchors_per_layer The values at each index are the highest IOU score between each detector's predicted box and the true boxes of a particular image. """ with tf.variable_scope(name): prct_yolo_outputs = tf.reshape(prct_yolo_outputs, shape=(-1, shape[1] * shape[2] * shape[3], 4)) prct_yolo_outputs = tf.expand_dims(prct_yolo_outputs, -2) yolo_outputs_xy_mins = prct_yolo_outputs[:, :, :, 0: 2] - prct_yolo_outputs[:, :, :, 2: 4] / 2.0 yolo_outputs_xy_maxes = prct_yolo_outputs[:, :, :, 0: 2] + prct_yolo_outputs[:, :, :, 2: 4] / 2.0 y_true_boxes = y_true_boxes[:, :, 0:4] y_true_boxes = tf.expand_dims( tf.reshape(y_true_boxes, [batch_size, -1, 4]), 1) y_true_xy_mins = y_true_boxes[ ..., 0:2] - y_true_boxes[:, :, :, 2:4] / 2.0 y_true_xy_maxes = y_true_boxes[ ..., 0:2] + y_true_boxes[:, :, :, 2:4] / 2.0 intersecting_mins = tf.maximum(yolo_outputs_xy_mins, y_true_xy_mins) intersecting_maxes = tf.minimum(yolo_outputs_xy_maxes, y_true_xy_maxes) intersect_hw = tf.maximum( intersecting_maxes - intersecting_mins, 0.0) intersect_area = intersect_hw[..., 0] * intersect_hw[..., 1] yolo_outputs_area = prct_yolo_outputs[ ..., 2] * prct_yolo_outputs[..., 3] y_true_area = y_true_boxes[..., 2] * y_true_boxes[..., 3] iou = intersect_area / (yolo_outputs_area + y_true_area - intersect_area) max_ious = tf.reduce_max(iou, axis=-1) return max_ious with tf.variable_scope(name): num_anchors = len(anchors) shape = yolo_layer_outputs.get_shape().as_list() with tf.variable_scope('trnsfrm_yolo_layer'): wh = tf.cast(tf.constant([w, h, w, h]), tf.float32) percentage_yolo_outputs = yolo_layer_outputs[..., 0:4] / wh with tf.variable_scope('trnsfrm_conv_layer'): conv_layer_outputs = tf.reshape( conv_layer_outputs, [-1, shape[1], shape[2], shape[3], shape[4]]) with tf.variable_scope('trnsfrm_y_true'): y_true = tf.reshape( y_true, [-1, shape[1], shape[2], shape[3], shape[4]]) percent_x, percent_y, percent_w, percent_h, obj_mask, classes = tf.split( y_true, [1, 1, 1, 1, 1, num_classes], axis=-1) with tf.variable_scope('pred_coords_loss'): with tf.variable_scope('cnvrt_y_true_coords'): with tf.variable_scope('cnvrt_xy'): clustroid_x = tf.tile( tf.reshape(tf.range(shape[2], dtype=tf.float32), [1, -1, 1, 1]), [shape[2], 1, 1, 1]) clustroid_y = tf.tile( tf.reshape(tf.range(shape[1], dtype=tf.float32), [-1, 1, 1, 1]), [1, shape[1], 1, 1]) converted_x_true = percent_x * shape[2] - clustroid_x converted_y_true = percent_y * shape[1] - clustroid_y with tf.variable_scope('cnvrt_wh'): anchors = tf.constant(anchors, dtype=tf.float32) anchors_w = tf.reshape(anchors[:, 0], [1, 1, 1, num_anchors, 1]) anchors_h = tf.reshape(anchors[:, 1], [1, 1, 1, num_anchors, 1]) converted_w_true = tf.log((percent_w / anchors_w) * w) converted_h_true = tf.log((percent_h / anchors_h) * h) with tf.variable_scope('concat_cnvrtd_y_true'): converted_y_true = tf.concat([ converted_x_true, converted_y_true, converted_w_true, converted_h_true ], axis=-1) with tf.variable_scope('replace_inf_with_zeros'): converted_y_true = tf.where( tf.is_inf(converted_y_true), tf.zeros_like(converted_y_true), converted_y_true) with tf.variable_scope('box_loss_scale'): box_loss_scale = 2 - y_true[..., 2:3] * y_true[..., 3:4] with tf.variable_scope('xy_coord_loss'): xy_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=converted_y_true[..., 0:2], logits=conv_layer_outputs[ ..., 0:2]) * obj_mask * box_loss_scale xy_loss = tf.reduce_sum(xy_loss) with tf.variable_scope('wh_coord_loss'): wh_loss = tf.square(converted_y_true[..., 2:4] - conv_layer_outputs[..., 2:4] ) * 0.5 * obj_mask * box_loss_scale wh_loss = tf.reduce_sum(wh_loss) with tf.variable_scope('compile_coord_loss'): coord_loss = xy_loss + wh_loss with tf.variable_scope('pred_obj_loss'): with tf.variable_scope('create_ignore_mask'): box_iou = iou('iou_yolo_bxs_y_true_bxs', percentage_yolo_outputs, y_true_boxes, shape, batch_size) ignore_mask = tf.cast( tf.less(box_iou, ignore_thresh * tf.ones_like(box_iou)), tf.float32) ignore_mask = tf.reshape( ignore_mask, [-1, shape[1], shape[2], num_anchors]) ignore_mask = tf.expand_dims(ignore_mask, -1) with tf.variable_scope('no_obj_loss'): no_obj_loss = ( 1 - obj_mask) * tf.nn.sigmoid_cross_entropy_with_logits( labels=obj_mask, logits=conv_layer_outputs[..., 4:5]) * ignore_mask no_obj_loss = tf.reduce_sum(no_obj_loss) with tf.variable_scope('obj_loss'): obj_loss = obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=obj_mask, logits=conv_layer_outputs[..., 4:5]) obj_loss = tf.reduce_sum(obj_loss) with tf.variable_scope('compile_pred_obj_loss'): confidence_loss = obj_loss + no_obj_loss with tf.variable_scope('pred_class_loss'): class_loss = obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=y_true[..., 5:], logits=conv_layer_outputs[..., 5:]) class_loss = tf.reduce_sum(class_loss) with tf.variable_scope('compile_losses'): loss = coord_loss + confidence_loss + class_loss return loss
def compute_disorientation_tf(eu1s, eu2s): #eu1s = tf.transpose(eu1s) #eu2s = tf.transpose(eu2s) tf_sh = eu1s.get_shape().as_list() #print 'tf_sh: ',tf_sh #fact = tf.divide(tf.math.pi, 180.0) #eu1s = tf.multiply(eu1s, fact) #eu2s = tf.multiply(eu2s, fact) #print tf_sh, eu1s.get_shape(), eu2s.get_shape() sym = tf.constant(sym_values(), dtype=tf.float32) om1s = eu2om_tf(eu1s) om2s = eu2om_tf(eu2s) #for m in range(tf_sh[1]): #eu1 = eu1s[:,m] #eu2 = eu2s[:,m] #om1 = eu2om_tf(eu1) #om2 = eu2om_tf(eu2) #print om2 dis_array = tf.constant(100.0, shape=[tf_sh[0]]) #print 'dis_array: ', dis_array.get_shape() #print 'symm shape1: ', sym.get_shape() sym = tf.transpose(sym, perm=[2, 0, 1]) #print 'symm shape2: ', sym.get_shape() #om1s = tf.transpose(om1s, perm=[1,2,0]) #om2s = tf.transpose(om2s, perm=[1, 2, 0]) #print 'om1s: ', om1s.get_shape(), om2s.get_shape() ones = tf.ones(shape=[tf_sh[0]]) delta = tf.constant(0e-1, shape=[tf_sh[0]]) for i in range(24): sym_s = sym[i, :, :] sym_s = tf.reshape(sym_s, shape=[1, 3, 3]) sym_s = tf.tile(sym_s, multiples=[tf_sh[0], 1, 1]) #print 'sym_s: ', sym_s.get_shape() g1 = tf.matmul(sym_s, om1s) #print 'g1: ', g1.get_shape() #print sym[:,:,i] for j in range(24): sym_s = sym[j, :, :] sym_s = tf.reshape(sym_s, shape=[1, 3, 3]) sym_s = tf.tile(sym_s, multiples=[tf_sh[0], 1, 1]) g2 = tf.matmul(sym_s, om2s) #print 'g2: ', g2.get_shape() #print sym[:,:,j] g = tf.matmul(g1, tf.transpose(g2, perm=[0, 2, 1])) #print 'g: ', g.get_shape() ang = 0.5 * tf.subtract(tf.trace(g), ones) cond_g = tf.logical_or(tf.greater_equal(ang, ones), tf.is_inf(ang)) cond_l = tf.logical_or(tf.is_nan(ang), tf.less_equal(ang, -ones)) ang_r = tf.where(cond_l, -ones + delta, ang) ang_r = tf.where(cond_g, -ones + delta, ang_r) th = tf.acos(ang_r) * 180.0 / math.pi #print 'th: ', th.get_shape() condition = tf.less(th, dis_array) dis_array = tf.where(condition, th, dis_array) g = tf.matmul(g2, tf.transpose(g1, perm=[0, 2, 1])) ang = 0.5 * tf.subtract(tf.trace(g), ones) cond_g = tf.logical_or(tf.greater_equal(ang, ones), tf.is_inf(ang)) cond_l = tf.logical_or(tf.is_nan(ang), tf.less_equal(ang, -ones)) ang_r = tf.where(cond_l, -ones + delta, ang) ang_r = tf.where(cond_g, -ones + delta, ang_r) th = tf.acos(ang_r) * 180.0 / math.pi condition = tf.less(th, dis_array) dis_array = tf.where(condition, th, dis_array) #diss.append(dis) #diss = tf.stack(diss) #print 'diss: ', diss.get_shape(), diss.dtype #print 'dis_array: ', dis_array.get_shape() return dis_array