def call(self, inputs, source_mask=None, target_mask=None): source, target = inputs source_mask = rk.utils.convert_to_attention_mask(source, source_mask) target_mask = rk.utils.convert_to_attention_mask(target, target_mask) encoder_outputs = self.encoder(source, encoder_mask=source_mask) do_pad = tf.equal(tf.mod(tf.shape(target)[1], 2), 1) target = K.switch( do_pad, tf.pad(target, [[0, 0], [0, 1], [0, 0]]), target) encoder_outputs = K.switch( do_pad, tf.pad(encoder_outputs, [[0, 0], [0, 1], [0, 0]]), encoder_outputs) # also pad source mask, target_mask output = target log_det_W_list = [] log_s_list = [] for i in range(self.n_flows): output, log_det_W = self.invertible_dense[i](output) output, log_s = self.WN[i]( output, encoder_outputs=encoder_outputs, encoder_mask=source_mask, decoder_mask=target_mask) log_s_list.append(log_s) log_det_W_list.append(log_det_W) return output, log_s_list, log_det_W_list
def BetheBlochGeant( lnbg, kp0, kp1, kp2, kp3, kp4): #kp0=2.33,kp1=0.20,kp2=3.00,kp3=173.0e-9,kp4=0.49848 bg = K.exp(lnbg) mK = 0.307075e-3 me = 0.511e-3 rho = kp0 x0 = kp1 * 2.303 x1 = kp2 * 2.303 mI = kp3 mZA = kp4 bg2 = bg * bg maxT = 2 * me * bg2 x = lnbg lhwI = K.log(28.816e-9 * K.sqrt(K.cast(rho * mZA, dtype=float)) / mI) d2 = K.switch( K.greater(x, x1), lhwI + x - 0.5, K.switch( K.greater(x, x0), lhwI + x - 0.5 + (0.5 - lhwI - x0) * (((x1 - x) / (x1 - x0))**3), 0. * bg)) return mK * mZA * (1 + bg2) / bg2 * (0.5 * K.log(2 * me * bg2 * maxT / (mI * mI)) - bg2 / (1 + bg2) - d2)
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) c = 0 prev_grads = [ shared_zeros(p.shape, name=f"prev_grads") for p in params ] c = 0 prev_steps = [ sharedX(np.full(p.shape, self.step_init), name=f"prev_stepd") for p in params ] self.updates = [] for p, grad, prev_grad, prev_step, c in zip(params, grads, prev_grads, prev_steps): grad_sgn = prev_grad * grad new_step = K.switch( K.ge(grad_sgn, 0.0), K.minimum(prev_step * self.step_inc, self.step_max), K.maximum(prev_step * self.step_dec, self.step_min)) self.updates.append((prev_step, new_step)) new_grad = K.switch(K.ge(grad_sgn, 0.0), grad, 0.0) self.updates.append((prev_grad, new_grad)) new_p = p - K.sgn(new_grad) * new_step self.updates.append((p, c(new_p))) return self.updates
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] alphas = [ K.variable(K.ones(shape) * self.init_alpha) for shape in shapes ] old_grads = [K.zeros(shape) for shape in shapes] self.weights = alphas + old_grads self.updates = [] for p, grad, old_grad, alpha in zip(params, grads, old_grads, alphas): grad = K.sign(grad) new_alpha = K.switch( K.greater(grad * old_grad, 0), K.minimum(alpha * self.scale_up, self.max_alpha), K.switch(K.less(grad * old_grad, 0), K.maximum(alpha * self.scale_down, self.min_alpha), alpha)) grad = K.switch(K.less(grad * old_grad, 0), K.zeros_like(grad), grad) new_p = p - grad * new_alpha # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) self.updates.append(K.update(alpha, new_alpha)) self.updates.append(K.update(old_grad, grad)) return self.updates
def get_updates(self, loss, params): sync_cond = K.equal((self.iterations + 1) // self.sync_period * self.sync_period, (self.iterations + 1)) if TF_KERAS: slow_params = [K.variable(K.get_value(p), name='sp_{}'.format(i)) for i, p in enumerate(params)] self.updates = self.optimizer.get_updates(loss, params) slow_updates = [] for p, sp in zip(params, slow_params): sp_t = sp + self.slow_step * (p - sp) slow_updates.append(K.update(sp, K.switch( sync_cond, sp_t, sp, ))) slow_updates.append(K.update_add(p, K.switch( sync_cond, sp_t - p, K.zeros_like(p), ))) else: slow_params = {p.name: K.variable(K.get_value( p), name='sp_{}'.format(i)) for i, p in enumerate(params)} update_names = ['update', 'update_add', 'update_sub'] original_updates = [getattr(K, name) for name in update_names] setattr(K, 'update', lambda x, new_x: ('update', x, new_x)) setattr(K, 'update_add', lambda x, new_x: ('update_add', x, new_x)) setattr(K, 'update_sub', lambda x, new_x: ('update_sub', x, new_x)) self.updates = self.optimizer.get_updates(loss, params) for name, original_update in zip(update_names, original_updates): setattr(K, name, original_update) slow_updates = [] for i, update in enumerate(self.updates): if isinstance(update, tuple): name, x, new_x, adjusted = update + (update[-1],) update_func = getattr(K, name) if name == 'update_add': adjusted = x + new_x if name == 'update_sub': adjusted = x - new_x if x.name not in slow_params: self.updates[i] = update_func(x, new_x) else: slow_param = slow_params[x.name] slow_param_t = slow_param + \ self.slow_step * (adjusted - slow_param) slow_updates.append(K.update(slow_param, K.switch( sync_cond, slow_param_t, slow_param, ))) self.updates[i] = K.update(x, K.switch( sync_cond, slow_param_t, adjusted, )) slow_params = list(slow_params.values()) self.updates += slow_updates self.weights = self.optimizer.weights + slow_params return self.updates
def merge_action_crossed_fn(x): input = x[0] is_crossed_orig = x[1] is_crossed_orig = K.switch(is_crossed_orig > 0.1, K.maximum(is_crossed_orig, 1), is_crossed_orig * 0) action_count = input.shape[1] is_crossed = K.expand_dims(is_crossed_orig, axis=1) is_crossed = K.expand_dims(is_crossed, axis=1) is_crossed = K.temporal_padding(is_crossed, (0, action_count - 1)) is_crossed = K.squeeze(is_crossed, axis=2) is_crossed_mask = K.expand_dims(is_crossed_orig, axis=1) is_crossed_mask = K.repeat_elements(is_crossed_mask, action_count, axis=1) res_crossed = (1 - is_crossed_mask) * input + is_crossed carpisma_timer_orig = x[2] carpisma_timer_orig = K.squeeze(carpisma_timer_orig, axis=2) is_carpisma = K.sum(carpisma_timer_orig, axis=1) is_carpisma = K.switch(is_carpisma > 0.1, K.maximum(is_carpisma, 1), is_carpisma * 0) not_carpisma = 1 - is_carpisma print("carpisma timer", carpisma_timer_orig) print("is carpisma", is_carpisma.shape) print("not carpisma", not_carpisma.shape) not_carpisma = K.expand_dims(not_carpisma, axis=1) not_carpisma = K.repeat_elements(not_carpisma, action_count, axis=1) res_crossed = res_crossed * not_carpisma res = K.concatenate([res_crossed, carpisma_timer_orig], axis=1) return res
def _init_cel(self, A_graph, b_graph, c_graph, y): # Sanity Checks y = tf.check_numerics(y, 'Problem with input y') # Find intersection points between Ax-b and the line joining the c and y Ac = tf.reduce_sum(A_graph * tf.expand_dims(c_graph, axis=-2), axis=-1) bMinusAc = b_graph - Ac yMinusc = y - c_graph ADotyMinusc = tf.reduce_sum((A_graph * tf.expand_dims(yMinusc, -2)), axis=-1) intersection_alphas = bMinusAc / (ADotyMinusc + K.epsilon()) # Enforce intersection_alpha > 0 because the point must lie on the ray from c to y less_equal_0 = K.less_equal(intersection_alphas, K.zeros_like(intersection_alphas)) candidate_alpha = K.switch( less_equal_0, K.ones_like(intersection_alphas) * tf.constant(np.inf, dtype='float32'), intersection_alphas) # Find closest the intersection point closest to the interior point to get projection point intersection_alpha = K.min(candidate_alpha, axis=-1, keepdims=True) # If it is an interior point, y itself is the projection point is_interior_point = K.greater_equal(intersection_alpha, K.ones_like(intersection_alpha)) alpha = K.switch(is_interior_point, K.ones_like(intersection_alpha), intersection_alpha) # Return z = \alpha.y + (1 - \alpha).c z = alpha * y + ((1 - alpha) * c_graph) return z
def get_model(self): input_shape = self.ops.INPUT_SIZE input = Input(shape=input_shape, name='observation') action_outputs = [] done_outputs = [] reward_outputs = [] goal_position = 0.5 for I in range(self.ops.ACTION_COUNT): action_output = Lambda(mountaincar_next_state_fn, arguments={'action': I})(input) action_outputs.append(action_output) #print(action_output.shape) done_output = Lambda(lambda x: K.switch( x[:, 0] > goal_position, 1 + x[:, 0:1] * 0, x[:, 0:1] * 0))( action_output) done_outputs.append(done_output) # @ersin: -100: penalizer'li hali #reward_output = Lambda(lambda x: x[:,0:1]*0 - 1)(done_output) reward_output = Lambda(lambda x: K.switch( x < 0.01, x[:, 0:1] * 0 - 0.01, x[:, 0:1] * 0 + 1))( done_output) reward_outputs.append(reward_output) reward_output_concat = Concatenate()(reward_outputs) model = Model(inputs=[input], outputs=action_outputs + [reward_output_concat] + done_outputs) my_optimizer = Adam(lr=self.ops.LEARNING_RATE) model.compile(optimizer=my_optimizer, loss='mse') return model
def __init__(self, optimizer, steps_per_update=1, **kwargs): assert float( tf.__version__[:4] ) <= 1.13, "Please make sure that your tensorflow version is 1.13.x or lower." super(AccumOptimizer, self).__init__(**kwargs) self.optimizer = optimizer with K.name_scope(self.__class__.__name__): self.steps_per_update = steps_per_update self.iterations = K.variable(0, dtype='int64', name='iterations') self.cond = K.equal(self.iterations % self.steps_per_update, 0) self.lr = self.optimizer.lr self.optimizer.lr = K.switch(self.cond, self.optimizer.lr, 0.) for attr in ['momentum', 'rho', 'beta_1', 'beta_2']: if hasattr(self.optimizer, attr): value = getattr(self.optimizer, attr) setattr(self, attr, value) setattr(self.optimizer, attr, K.switch(self.cond, value, 1 - 1e-7)) for attr in self.optimizer.get_config(): if not hasattr(self, attr): value = getattr(self.optimizer, attr) setattr(self, attr, value) # Cover the original get_gradients method with accumulative gradients. def get_gradients(loss, params): return [ag / self.steps_per_update for ag in self.accum_grads] self.optimizer.get_gradients = get_gradients
def get_updates(self, loss, params): # Create accumulated gradients grads = self.get_gradients(loss, params) self.updates = [] with tf.control_dependencies([self.iterations.assign_add(1)]): update_cond = K.equal(self.iterations % self.accumulation_steps, 0) sub_step = (self.iterations - 1) % self.accumulation_steps + 1 fake_iterations = (self.iterations - 1) // self.accumulation_steps + 1 acc_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] for grad, acc_grad in zip(grads, acc_grads): ave_grad = grad / K.cast(self.accumulation_steps, K.floatx()) self.updates.append(K.update( acc_grad, K.switch( K.equal(sub_step, 1), ave_grad, acc_grad + (ave_grad - acc_grad) / K.cast(sub_step, K.floatx()) ), )) self.optimizer.get_gradients = lambda _loss, _params: \ [K.switch(update_cond, grad, K.zeros_like(grad)) for grad in acc_grads] # Use fake iterations original_iterations = self.optimizer.iterations original_assign_add = getattr(state_ops, 'assign_add') setattr( state_ops, 'assign_add', lambda ref, val: original_assign_add(ref, val) if ref is not fake_iterations else original_assign_add(original_iterations, val) ) self.optimizer.iterations = fake_iterations # Use fake learning rate self.optimizer.learning_rate = K.switch(update_cond, self.lr, 0.0) # Freeze momentum momentum = {} for name in self.momentum_names: if hasattr(self.optimizer, name): momentum[name] = getattr(self.optimizer, name) setattr(self.optimizer, name, K.switch(update_cond, momentum[name], (1.0 - K.epsilon()))) for update in self.optimizer.get_updates(loss, params): if update is not None: self.updates.append(update) # Restore variables for name, value in momentum.items(): setattr(self.optimizer, name, value) self.optimizer.learning_rate = self._lr self.optimizer.iterations = original_iterations setattr(state_ops, 'assign_add', original_assign_add) return self.updates
def _get_update_list(self, kernel): update_list = super(E2EFSSoft, self)._get_update_list(kernel) update_list += [ (self.moving_factor, K.switch(K.less(self.moving_T, self.warmup_T), self.start_alpha, K.minimum(self.alpha_M, self.start_alpha + (1. - self.start_alpha) * (self.moving_T - self.warmup_T) / self.T))), (self.moving_T, self.moving_T + 1), (self.moving_decay, K.switch(K.less(self.moving_factor, self.alpha_M), self.moving_decay, K.maximum(.75, self.moving_decay + self.epsilon))) ] return update_list
def loss_units(x): t = x / K.max(K.abs(x)) x = K.switch(K.less(t, K.epsilon()), K.zeros_like(x), x) m = K.sum(K.cast(K.greater(x, 0.), K.floatx())) sum_x = K.sum(x) moving_units = K.switch(K.less_equal(m, self.units), m, (1. - self.moving_decay) * self.moving_units) epsilon_minus = 0. epsilon_plus = K.switch(K.less_equal(m, self.units), self.moving_units, 0.) return K.relu(moving_units - sum_x - epsilon_minus) + K.relu(sum_x - moving_units - epsilon_plus)
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = K.sigmoid( K.reshape( K.bias_add(K.dot(K.reshape(inputs, [-1, d_model]), self.halting_kernel), self.halting_biases, data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = K.greater(self.halt_budget, 0) no_further_steps = K.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = K.switch( step_is_active, K.switch(no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += K.switch(step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = (self.time_penalty_t * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = K.switch(no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # OK to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = K.zeros_like(inputs, name='zeros_like_input') # just because K.any(step_is_active) doesn't work in PlaidML any_step_is_active = K.greater(K.sum(K.cast(step_is_active, 'int32')), 0) step_weighted_output = K.switch( any_step_is_active, K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def wrapper(y_true, y_pred): if (usesoftmax): y_pred = tf.keras.activations.softmax(y_pred) y_pred = backend.clip(y_pred, _EPSILON, 1.0 - _EPSILON) """ here all calculations will be based on the class greater than 0, except accuracy""" avgIOU = 0.0 for i in range(batch_size): numUnion = 1.0 recall = 0.0 numClass = 0.0 IOU = 0.0 mask = backend.argmax(y_true[i], -1) pred = backend.argmax(y_pred[i], -1) for c in np.arange(1, num_classes, 1): msk_equal = backend.cast(backend.equal(mask, c), dtype='float32') masks_sum = backend.sum(msk_equal) predictions_sum = backend.sum( backend.cast(backend.equal(pred, c), 'float32')) numTrue = backend.sum( backend.cast(backend.equal(pred, c), 'float32') * backend.cast(backend.equal(mask, c), 'float32')) unionSize = masks_sum + predictions_sum - numTrue maskhaslabel = backend.greater(masks_sum, 0) predhaslabel = backend.greater(predictions_sum, 0) predormaskexistlabel = backend.any(backend.stack( [maskhaslabel, predhaslabel], axis=0), axis=0) IOU = backend.switch(predormaskexistlabel, lambda: IOU + numTrue / unionSize, lambda: IOU) numUnion = backend.switch(predormaskexistlabel, lambda: numUnion + 1, lambda: numUnion) recall = backend.switch(maskhaslabel, lambda: recall + numTrue / masks_sum, lambda: recall) numClass = backend.switch(maskhaslabel, lambda: numClass + 1, lambda: numClass) IOU = IOU / numUnion avgIOU = avgIOU + IOU avgIOU = avgIOU / batch_size iou_loss = 1.0 - avgIOU # print(np.shape(y_true), np.shape(y_pred)) main_loss = backend.mean(weighted_loss_fn(y_true, y_pred)) # dice_loss = soft_dice_loss(y_true, y_pred) return main_loss + 0.1 * iou_loss
def weighted_sum(first, second, sigma, first_threshold=-np.inf, second_threshold=np.inf): logit_probs = first * sigma + second * (1.0 - sigma) infty_tensor = K.ones_like(logit_probs) * INFTY logit_probs = K.switch(K.greater(first, first_threshold), logit_probs, infty_tensor) logit_probs = K.switch(K.greater(second, second_threshold), logit_probs, infty_tensor) return logit_probs
def _get_update_list(self, kernel): super(E2EFSSoft, self)._get_update_list(kernel) self.moving_factor.assign( K.switch( K.less(self.moving_T, self.warmup_T), self.start_alpha, K.minimum( self.alpha_M, self.start_alpha + (1. - self.start_alpha) * (self.moving_T - self.warmup_T) / self.T))) self.moving_T.assign_add(1.) self.moving_decay.assign( K.switch(K.less(self.moving_factor, self.alpha_M), self.moving_decay, K.maximum(.75, self.moving_decay + self.epsilon)))
def _get_update_list(self, kernel): update_list = super(E2EFSRanking, self)._get_update_list(kernel) update_list += [ (self.moving_factor, K.switch(K.less_equal(self.moving_T, self.warmup_T), self.start_alpha, K.minimum(self.alpha_M, self.start_alpha + (1. - self.start_alpha) * (self.moving_T - self.warmup_T) / self.T))), (self.moving_T, self.moving_T + 1), (self.moving_units, K.switch(K.less_equal(self.moving_T, self.warmup_T), K.cast_to_floatx((1. - self.start_alpha) * np.prod(K.int_shape(kernel))), K.maximum(self.alpha_M, np.prod(K.int_shape(kernel)) * K.pow(K.cast_to_floatx(1. / np.prod(K.int_shape(kernel))), self.speedup * (self.moving_T - self.warmup_T) / self.T)))), # K.maximum(1., (self.T - self.start_alpha - self.speedup * (self.moving_T - self.warmup_T)) * np.prod(K.int_shape(kernel)) / self.T))), ] return update_list
def new_update(x, new_x): if x is var and self._do_layer_adaptation(x): dx = new_x - x lr_t = self._decayed_lr(x.dtype.base_dtype) lr_t = K.clip(lr_t, K.epsilon(), 1e10) x_norm = tf.norm(x) g_norm = tf.norm(dx / lr_t) ratio = K.switch( x_norm > 0., K.switch(g_norm > K.epsilon(), x_norm / g_norm, 1.), 1.) new_x = x + dx * ratio return old_update(x, new_x)
def get_model(self): input_shape = self.ops.INPUT_SIZE input = Input(shape=input_shape, name='observation') x = input #x = Dense(24,activation="relu", kernel_initializer='he_uniform')(x) action_outputs = [] done_outputs = [] reward_outputs = [] losses = [] loss_weights = [] for I in range(self.ops.ACTION_COUNT): action_output = Dense( input_shape[0], kernel_initializer=Constant(1), bias_initializer=Constant(0.05 if I == 0 else -0.05))(x) action_output = Lambda(lambda x: K.clip(x, -1, 1))(action_output) action_outputs.append(action_output) print(action_output.shape) done_output = Lambda(lambda x: K.switch( K.abs(x - 0.5) < 0.05, 1 + x * 0, x * 0))(action_output) #done_output = Dense(1, kernel_initializer='he_uniform', activation='sigmoid')(x) done_outputs.append(done_output) losses.append('mse') loss_weights.append(1) #reward_output = Dense(20, activation='sigmoid')(input) #reward_output = RBFLayer(100, initializer=RandomUniform(-1.0, 1.0), betas=0.02)(input) #reward_output = Dense(20, activation='relu')(input) #reward_output = Dense(20, activation='hard_sigmoid')(input) #reward_output = Dense(1)(reward_output) reward_output = Lambda(lambda x: K.switch(x < 0.01, x * 0 - 0.01, x * 0 + 1))(done_output) reward_outputs.append(reward_output) #reward_hidden = Dense(256,activation="relu", kernel_initializer='he_uniform')(x) #reward_output = Dense(1, kernel_initializer=keras.initializers.random_uniform(-0.02, 0.02))(reward_hidden) #reward_outputs.append(reward_output) losses.append('mse') loss_weights.append(5) reward_output_concat = Concatenate()(reward_outputs) for I in range(self.ops.ACTION_COUNT): losses.append('binary_crossentropy') loss_weights.append(0.3) model = Model(inputs=[input], outputs=action_outputs + [reward_output_concat] + done_outputs) my_optimizer = Adam(lr=self.ops.LEARNING_RATE) #my_optimizer = SGD(lr=self.ops.LEARNING_RATE) #my_optimizer = RMSprop(lr=self.ops.LEARNING_RATE) #epsilon=None, , rho=0.90, decay=0.0 model.compile(optimizer=my_optimizer, loss=losses, loss_weights=loss_weights) return model
def nmse(y_true, y_pred, a, c): dim = len(c) - 1 loss_classes = [[] for i in range(dim)] cond = [[] for i in range(dim)] loss = [[] for i in range(dim)] for i in range(0, dim): loss_classes[i] = a[i] * K.square((y_pred - y_true) / (y_true)) cond[i] = K.less(y_true, c[i + 1]) & K.greater(y_true, c[i]) loss[0] = K.switch(cond[0], loss_classes[0], loss_classes[1]) for i in range(1, dim): loss[i] = K.switch(cond[i], loss_classes[i], loss[i - 1]) return K.mean(loss[dim - 1], axis=-1)
def check_carpisma(x): cars = tf.strided_slice(x, [0, 0, 0, 3], [1, 210, 160, 40], [1, 1, 1, 4]) cars = K.sum(cars, axis=3, keepdims=True) tavuks = tf.strided_slice(x, [0, 0, 0, 43], [1, 210, 160, 52], [1, 1, 1, 4]) tavuks = K.sum(tavuks, axis=3, keepdims=True) #cars = K.sum(x[:,:,:,3:4:40],axis=3,keepdims=True) #tavuks = K.sum(x[:,:,:,43:4:52],axis=3,keepdims=True) #cars_capped = K.switch(K.greater(cars, 0.1), K.ones_like(cars), K.zeros_like(cars)) #tavuks_capped = K.switch(K.greater(tavuks, 0.1), K.ones_like(tavuks), K.zeros_like(tavuks)) cars_capped = K.switch(cars > 0.1, K.maximum(cars, 1), cars * 0) tavuks_capped = K.switch(tavuks > 0.1, K.maximum(tavuks, 1), tavuks * 0) carpisma = K.sum(cars_capped * tavuks_capped, axis=[1, 2, 3]) carpisma = K.minimum(carpisma, 1) return carpisma
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.inital_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / ( 1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] f = K.variable(0) d = K.variable(1) self.weights = [self.iterations] + ms + vs + [f, d] cond = K.greater(t, K.variable(1)) small_delta_t = K.switch(K.greater(loss, f), self.small_k + 1, 1. / (self.big_K + 1)) big_delta_t = K.switch(K.greater(loss, f), self.big_K + 1, 1. / (self.small_k + 1)) c_t = K.minimum(K.maximum(small_delta_t, loss / (f + self.epsilon)), big_delta_t) f_t = c_t * f r_t = K.abs(f_t - f) / (K.minimum(f_t, f)) d_t = self.beta_3 * d + (1 - self.beta_3) * r_t f_t = K.switch(cond, f_t, loss) d_t = K.switch(cond, d_t, K.variable(1.)) self.updates.append(K.update(f, f_t)) self.updates.append(K.update(d, d_t)) for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (d_t * K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t self.updates.append(K.update(p, new_p)) return self.updates
def piecewise_linear(t, schedule, from_zero=True): """分段线性函数 其中schedule是形如{1000: 1, 2000: 0.1}的字典, 表示 t ∈ [0, 1000]时,输出从0均匀增加至1,而 t ∈ [1000, 2000]时,输出从1均匀降低到0.1,最后 t > 2000时,保持0.1不变。 """ schedule = sorted(schedule.items()) if from_zero and schedule[0][0] != 0: schedule = [(0, 0.0)] + schedule t = K.cast(t, K.floatx()) x = (t * 0 + 1) * schedule[0][1] for i in range(len(schedule)): t_begin = schedule[i][0] x_begin = x if i != len(schedule) - 1: dx = schedule[i + 1][1] - schedule[i][1] dt = schedule[i + 1][0] - schedule[i][0] slope = 1.0 * dx / dt x = schedule[i][1] + slope * (t - t_begin) else: x = (t * 0 + 1) * schedule[i][1] x = K.switch(t >= t_begin, x, x_begin) return x
def new_get_updates(self, loss, params): self.accumulated_grads = [ K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params ] iters = K.update_add(self.accumulated_iters, 1) new_grads = orig_get_gradients(loss, params) if do_mean: new_grads = [g / K.cast(iter_size, K.dtype(g)) for g in new_grads] self.updated_grads = [ K.update_add(p, g) for p, g in zip(self.accumulated_grads, new_grads) ] def update_function(): with tf.control_dependencies(orig_get_updates(loss, params)): reset_grads = [ K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p))) for p in self.accumulated_grads ] return tf.group(*(reset_grads + [iters])) def just_store_function(): return tf.group(*[iters]) update_switch = K.equal(iters % iter_size, 0) with tf.control_dependencies(self.updated_grads): self.updates = [ K.switch(update_switch, update_function, just_store_function) ] return self.updates
def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): """Return the RPN bounding box loss graph. config: the model config object. target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. Uses 0 padding to fill in unsed bbox deltas. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] """ # Positive anchors contribute to the loss, but negative and # neutral anchors (match value of 0 or -1) don't. rpn_match = K.squeeze(rpn_match, -1) indices = tf.where(K.equal(rpn_match, 1)) # Pick bbox deltas that contribute to the loss rpn_bbox = tf.gather_nd(rpn_bbox, indices) # Trim target bounding box deltas to the same length as rpn_bbox. batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) target_bbox = batch_pack_graph(target_bbox, batch_counts, config.IMAGES_PER_GPU) loss = smooth_l1_loss(target_bbox, rpn_bbox) loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) return loss
def frcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): """Loss for Faster R-CNN bounding box refinement. target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] target_class_ids: [batch, num_rois]. Integer class IDs. pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] """ # Reshape to merge batch and roi dimensions for simplicity. target_class_ids = K.reshape(target_class_ids, (-1,)) target_bbox = K.reshape(target_bbox, (-1, 4)) pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) # Only positive ROIs contribute to the loss. And only # the right class_id of each ROI. Get their indices. positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] positive_roi_class_ids = tf.cast( tf.gather(target_class_ids, positive_roi_ix), tf.int64) indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) # Gather the deltas (predicted and true) that contribute to loss target_bbox = tf.gather(target_bbox, positive_roi_ix) pred_bbox = tf.gather_nd(pred_bbox, indices) # Smooth-L1 Loss loss = K.switch(tf.size(target_bbox) > 0, smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), tf.constant(0.0)) loss = K.mean(loss) return loss
def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks): """Mask binary cross-entropy loss for the masks head. target_masks: [batch, num_rois, height, width]. A float32 tensor of values 0 or 1. Uses zero padding to fill array. target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded. pred_masks: [batch, proposals, height, width, num_classes] float32 tensor with values from 0 to 1. """ # Reshape for simplicity. Merge first two dimensions into one. target_class_ids = K.reshape(target_class_ids, (-1, )) mask_shape = tf.shape(target_masks) target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3])) pred_shape = tf.shape(pred_masks) pred_masks = K.reshape(pred_masks, (-1, pred_shape[2], pred_shape[3], pred_shape[4])) pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2]) positive_ix = tf.where(target_class_ids > 0)[:, 0] positive_class_ids = tf.cast(tf.gather(target_class_ids, positive_ix), tf.int64) indices = tf.stack([positive_ix, positive_class_ids], axis=1) y_true = tf.gather(target_masks, positive_ix) y_pred = tf.gather_nd(pred_masks, indices) loss = K.switch( tf.size(y_true) > 0, K.binary_crossentropy(target=y_true, output=y_pred), tf.constant(0.0)) loss = K.mean(loss) return loss
def huber_loss(y, y_pred, delta: float = 1.0): """ Return the Huber loss between tensors. Reference: https://en.wikipedia.org/wiki/Huber_loss https://web.stanford.edu/class/cs20si/2017/lectures/slides_03.pdf https://keras.io/backend/ Args: y: ground truth y labels y_pred: predicted y labels delta: the separating constant between MSE and MAE Returns: a scalar loss between the ground truth and predicted labels """ # calculate the residuals residual = K.abs(y_pred - y) # determine the result of the logical comparison to delta condition = K.less_equal(residual, delta) # calculate the two possible returns (MSE and MAE) then_this = 0.5 * K.square(residual) else_this = delta * residual - 0.5 * K.square(delta) # use the condition to determine the resulting tensor return K.switch(condition, then_this, else_this)
def dropped_mask(): drop_mask = K.switch( K.random_uniform(K.shape(inputs)) < self.drop_rate, K.ones_like(inputs, K.floatx()), K.zeros_like(inputs, K.floatx()), ) return target_mask * drop_mask
def new_get_updates(self, loss, params): self.accumulated_grads = [ K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params ] update_iter = [K.update_add(self.accumulated_iterations, 1)] new_grads = self.orig_get_gradients(loss, params) if self.do_mean: new_grads = [ g / K.cast(self.iter_size, K.dtype(g)) for g in new_grads ] update_grads = [ K.update_add(p, g) for p, g in zip(self.accumulated_grads, new_grads) ] def update_func(): with tf.control_dependencies(self.orig_get_updates(loss, params)): reset_grads = [ K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p))) for p in self.accumulated_grads ] return tf.group(*(reset_grads + update_iter)) def just_iter_func(): return tf.group(*update_iter) # do the original get_updates() computations only once every # 'iter_size' iterations update_switch = K.equal(self.accumulated_iterations % self.iter_size, 0) with tf.control_dependencies(update_grads): self.updates = [ K.switch(update_switch, update_func, just_iter_func) ] return self.updates