def call(self, x, mask=None): def batch_pool_rois(x): imgs = x[0] roi_boxes = x[1] return self.pool_image_rois(imgs, roi_boxes) return K.map_fn(batch_pool_rois, x, dtype=tf.float32)
def call(self, x, mask=None): # Unfold inputs (document representations, label representations) doc_reps, label_reps = x doc2_reps = K.tanh(dot_product(doc_reps, self.W_d) + self.b_d) # Compute Attention Scores doc_a = dot_product(doc2_reps, label_reps) def label_wise_attention(values): doc_repi, ai = values ai = K.softmax(K.transpose(ai)) label_aware_doc_rep = K.dot(ai, doc_repi) if self.return_attention: return [label_aware_doc_rep, ai] else: return [label_aware_doc_rep, label_aware_doc_rep] label_aware_doc_reprs, attention_scores = K.map_fn(label_wise_attention, [doc_reps, doc_a]) label_aware_doc_reprs = K.sum(label_aware_doc_reprs * label_reps, axis=-1) label_aware_doc_reprs = K.sigmoid(label_aware_doc_reprs) if self.return_attention: return [label_aware_doc_reprs, attention_scores] return label_aware_doc_reprs
def call(self, inputs): # step 1: adapative average # from (batch, rows, n_features) to (batch, n_features, rows) inputs = self.transpose(inputs) avg = K.mean(inputs, axis=2) adaptive_avg = self.mean_layer(avg) adaptive_avg = K.reshape(adaptive_avg, (-1, self.n_features, 1)) inputs -= adaptive_avg # # step 2: adapative scaling std = K.mean(inputs ** 2, axis=2) std = K.sqrt(std + self.eps) adaptive_std = self.scaling_layer(std) fn = lambda elem: K.switch(K.less_equal(elem, 1.0), K.ones_like(elem), elem) adaptive_std = K.map_fn(fn, adaptive_std) adaptive_std = K.reshape(adaptive_std, (-1, self.n_features, 1)) inputs /= adaptive_std # # step 3: gating avg = K.mean(inputs, axis=2) gate = self.gating_layer(avg) gate = K.reshape(gate, (-1, self.n_features, 1)) inputs *= gate # from (batch, n_features, rows) => (batch, rows, n_features) inputs = self.transpose(inputs) return inputs
def call(self, tensors, mask=None): X, Y, m1, m2, theta = tensors M1_t = self._mask_batch_affine_warp3d(masks=m1, theta=theta) M2_t = self._mask_batch_affine_warp3d(masks=m2, theta=theta) if self.padding_method == "fill": # Altered to use fill method paddings = [[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]] rescale_theta = K.map_fn(self._rescale_theta, (theta, X), dtype=tf.float32) X = tf.pad(X, paddings, "CONSTANT") X_t = self._batch_affine_warp3d(imgs=X, theta=rescale_theta) X_t = X_t[:, 1:-1, 1:-1, 1:-1, :] elif self.padding_method == "replicate": X_t = self._batch_affine_warp3d(imgs=X, theta=theta) else: raise NotImplementedError output = tf.cast(self._ift3d( tf.math.multiply(self._ft3d(X_t), tf.cast(M1_t, tf.complex64)) + tf.math.multiply(self._ft3d(Y), tf.cast(M2_t, tf.complex64))), tf.float32) return [output, M1_t, M2_t]
def decode_ddd(hm, k, output_stride): hm = _nms(hm) hm_shape = K.shape(hm) # if offset: # offset_shape = K.shape(offset) # offset_flat = K.reshape(offset, (offset_shape[0], -1, offset_shape[-1])) # else: # offset_flat = None batch, width, cat = hm_shape[0], hm_shape[2], hm_shape[3] hm_flat = K.reshape(hm, (batch, -1)) def _process_sample(args): _hm = args # _hm, _offset = args _scores, _inds = tf.math.top_k(_hm, k=k, sorted=True) _classes = K.cast(_inds % cat, 'float32') _inds = K.cast(_inds / cat, 'int32') _xs = K.cast(_inds % width, 'float32') _ys = K.cast(K.cast(_inds / width, 'int32'), 'float32') _xs *= output_stride _ys *= output_stride # _xs += 4 # _ys += 4 _detection = K.stack([_xs, _ys, _scores, _classes], -1) return _detection detections = K.map_fn(_process_sample, [hm_flat], dtype=K.floatx()) # detections = K.map_fn(_process_sample, [hm_flat, offset_flat], dtype=K.floatx()) return detections
def call(self, inputs, training=None): # inputs.shape=[None, input_num_capsule, input_dim_capsule] # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule] inputs_expand = K.expand_dims(inputs, 1) # Replicate num_capsule dimension to prepare being multiplied by W # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule] inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1]) #Ref: replicate it to num of classes to facilitate multiplication # Compute `inputs * W` by scanning inputs_tiled on dimension 0. # x.shape=[num_capsule, input_num_capsule, input_dim_capsule] # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule] # Regard the first two dimensions as `batch` dimension, # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule]. # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule] T = tf.transpose(self.W,perm=[0,1,3,2]) inputs_hat = K.map_fn(lambda x:tf.matmul(K.expand_dims(x,2),T), elems=inputs_tiled) inputs_hat = K.squeeze(inputs_hat, axis=3) # Begin: Routing algorithm ---------------------------------------------------------------------# # The prior for coupling coefficient, initialized as zeros. # b.shape = [None, self.num_capsule, self.input_num_capsule]. b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule]) print(b.shape) assert self.routings > 0, 'The routings should be > 0.' for i in range(self.routings): # c.shape=[batch_size, num_capsule, input_num_capsule] c = K.map_fn(lambda x:tf.keras.activations.softmax(x,axis=0), elems=b) # c.shape = [batch_size, num_capsule, input_num_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule]. # outputs.shape=[None, num_capsule, dim_capsule] # outputs = squash(K.batch_dot(c, inputs_hat, [2, 1])) # [None, 10, 16] outputs = K.squeeze(tf.matmul(K.expand_dims(c,2),inputs_hat),axis=2) #print(outputs.shape) if i < self.routings - 1: # outputs.shape = [None, num_capsule, dim_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule]. # b.shape=[batch_size, num_capsule, input_num_capsule] res = K.squeeze(tf.matmul(K.expand_dims(outputs,2),tf.transpose(inputs_hat,perm=[0,1,3,2])),axis=2) b += res # End: Routing algorithm -----------------------------------------------------------------------# return outputs
def model_discriminator(global_shape=(256, 256, 3), local_shape=(128, 128, 3)): def crop_image(img, crop): return tf.image.crop_to_bounding_box(img, crop[1], crop[0], crop[3] - crop[1], crop[2] - crop[0]) in_pts = Input(shape=(4,), dtype='int32') cropping = Lambda(lambda x: K.map_fn(lambda y: crop_image(y[0], y[1]), elems=x, dtype=tf.float32), output_shape=local_shape) g_img = Input(shape=global_shape) l_img = cropping([g_img, in_pts]) l_img.set_shape((None,) + local_shape) # Local Discriminator x_l = Conv2D(64, kernel_size=5, strides=2, padding='same')(l_img) x_l = BatchNormalization()(x_l) x_l = Activation('relu')(x_l) x_l = Conv2D(128, kernel_size=5, strides=2, padding='same')(x_l) x_l = BatchNormalization()(x_l) x_l = Activation('relu')(x_l) x_l = Conv2D(256, kernel_size=5, strides=2, padding='same')(x_l) x_l = BatchNormalization()(x_l) x_l = Activation('relu')(x_l) x_l = Conv2D(512, kernel_size=5, strides=2, padding='same')(x_l) x_l = BatchNormalization()(x_l) x_l = Activation('relu')(x_l) x_l = Conv2D(512, kernel_size=5, strides=2, padding='same')(x_l) x_l = BatchNormalization()(x_l) x_l = Activation('relu')(x_l) x_l = Flatten()(x_l) x_l = Dense(1024, activation='relu')(x_l) # Global Discriminator x_g = Conv2D(64, kernel_size=5, strides=2, padding='same')(g_img) x_g = BatchNormalization()(x_g) x_g = Activation('relu')(x_g) x_g = Conv2D(128, kernel_size=5, strides=2, padding='same')(x_g) x_g = BatchNormalization()(x_g) x_g = Activation('relu')(x_g) x_g = Conv2D(256, kernel_size=5, strides=2, padding='same')(x_g) x_g = BatchNormalization()(x_g) x_g = Activation('relu')(x_g) x_g = Conv2D(512, kernel_size=5, strides=2, padding='same')(x_g) x_g = BatchNormalization()(x_g) x_g = Activation('relu')(x_g) x_g = Conv2D(512, kernel_size=5, strides=2, padding='same')(x_g) x_g = BatchNormalization()(x_g) x_g = Activation('relu')(x_g) x_g = Conv2D(512, kernel_size=5, strides=2, padding='same')(x_g) x_g = BatchNormalization()(x_g) x_g = Activation('relu')(x_g) x_g = Flatten()(x_g) x_g = Dense(1024, activation='relu')(x_g) x = concatenate([x_l, x_g]) x = Dense(1, activation='sigmoid')(x) return Model(inputs=[g_img, in_pts], outputs=x)
def apply_scatter_nd_add(tensor, updates, indices, tf_int, tf_float): """ applies the tensor_scatter_nd_add over the batch dimension """ out = Lambda(lambda entry: K.map_fn( lambda entry: tf.tensor_scatter_nd_add(entry[0], entry[1], entry[2]), entry, dtype=tf_float))([tensor, indices, updates]) return out
def apply_scatter_nd(updates, indices, tf_int, tf_float): """ applies scatter_nd over the batch dimension """ out = Lambda(lambda entry: K.map_fn( lambda entry: tf.scatter_nd(entry[0], entry[1], tf.constant([30100], dtype=tf_int)), entry, dtype=tf_float))([ indices, updates ]) # assuming a max vocab_size+unique_words_in_input of 30000+100 return out
def call(self, inputs, training=None): # inputs.shape=[None, input_num_capsule, input_dim_capsule] # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule] inputs_expand = K.expand_dims(inputs, 1) print(['the shape of inputs_expand', inputs_expand.shape]) #Replicate num_capsule dimension to prepare being multiplied by W #inputs_tiled.shape = [None, num_capsule,input_num_capsule, input_dim_capsule] input_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1]) print(['the shape of iput_tiled', input_tiled.shape]) print(['the shape of W', self.W.shape]) # Compute `inputs * W` by scanning inputs_tiled on dimension 0. # x.shape=[num_capsule, input_num_capsule, input_dim_capsule] # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule] # Regard the first two dimensions as `batch` dimension, # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule]. # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule] inputs_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, [2, 3]), elems=input_tiled) print(['the shape of input_hat', inputs_hat.shape]) # Begin: Routing algorithm ---------------------------------------------------------------------# # The prior for coupling coefficient, initialized as zeros. # b.shape = [None, self.num_capsule, self.input_num_capsule]. b = tf.zeros(shape=[ K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule ]) print(['the shape of b', b.shape]) assert self.routings > 0, 'the routings should be > 0' for i in range(self.routings): #c.shape = [batch_size,num_capsule,input_num_capsule] c = tf.nn.softmax(b, axis=1) print(['the shape of c', c.shape]) # c.shape = [batch_size, num_capsule, input_num_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule]. # outputs.shape=[None, num_capsule, dim_capsule] outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])) #[None,10,16] print(['the shape of outputs', outputs.shape]) if i < self.routings - 1: # outputs.shape = [None, num_capsule, dim_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule]. # b.shape=[batch_size, num_capsule, input_num_capsule] b += K.batch_dot(outputs, inputs_hat, [2, 3]) # End: Routing algorithm return outputs
def mc_dropout_preds(model, x: tf.Tensor, n_mc: int) -> tf.Tensor: """ Take a model, and a tensor of size batch_size x n_classes and return the result of doing n_mc stochastic forward passes as a n_mc x batch_size x n_classes tensor. This assumes the model has some VI layers like dropout or whatever, and that the model has been loaded with keras.backend.set_learning_phase(True). Also note that this takes and returns keras tensors, not arrays. """ # tile x n_mc times and predict in a batch xs = K.stack(list(itr.repeat(x, n_mc))) mc_preds = K.map_fn(model, xs) # [n_mc x batch_size x n_classes] return mc_preds
def word_accuracy(y_true, y_pred): """ Our custom metric for comparision between baseline and our model If all typing key are correct return 1 else 0 Note: This is difference from Keras's accuracy metric. If word has 3 length and 1 key is faild. Keras's accuracy metric will return 0.6667 but we want it to return 0 """ # if word is same, the sum will return 0 def is_correct_word(x): return K.sum(x) absolute = K.abs(y_true-y_pred) count = K.map_fn(is_correct_word,absolute) return K.equal(count,0)
def accuracy(y_true, y_pred): def calculate_accuracy(true_and_pred): y_true, y_pred_start, y_pred_end = true_and_pred start_probability = y_pred_start[K.cast(y_true[0], dtype='int32')] end_probability = y_pred_end[K.cast(y_true[1], dtype='int32')] return (start_probability + end_probability) / 2.0 y_true = K.squeeze(y_true, axis=1) y_pred_start = y_pred[:, 0, :] y_pred_end = y_pred[:, 1, :] accuracy = K.map_fn(calculate_accuracy, (y_true, y_pred_start, y_pred_end), dtype='float32') return K.mean(accuracy, axis=0)
def call(self, inputs, training=None): inputs_expand = K.expand_dims(inputs, 1) inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1]) inputs_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, [2, 3]), elems=inputs_tiled) b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule]) assert self.routings > 0 for i in range(self.routings): c = tf.nn.softmax(b, axis=1) outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])) # [None, 10, 16] if i < self.routings - 1: b += K.batch_dot(outputs, inputs_hat, [2, 3]) return outputs
def negative_avg_log_error(y_true, y_pred): def sum_of_log_probabilities(true_and_pred): y_true, y_pred_start, y_pred_end = true_and_pred start_probability = y_pred_start[K.cast(y_true[0], dtype='int32')] end_probability = y_pred_end[K.cast(y_true[1], dtype='int32')] return K.log(start_probability) + K.log(end_probability) y_true = K.squeeze(y_true, axis=1) y_pred_start = y_pred[:, 0, :] y_pred_end = y_pred[:, 1, :] batch_probability_sum = K.map_fn(sum_of_log_probabilities, (y_true, y_pred_start, y_pred_end), dtype='float32') return -K.mean(batch_probability_sum, axis=0)
def call(self, inputs, **kwargs): boxes = inputs[0] classification = inputs[1] other = inputs[2:] def _filter_detections(args): boxes, classification, other = args return filter_detections( boxes, classification, other, nms = self.nms, class_specific_filter = self.class_specific_filter, score_threshold = self.score_threshold, max_detections = self.max_detections, nms_threshold = self.nms_threshold ) return K.map_fn( _filter_detections, elems = [boxes, classification, other], dtype = [K.floatx(), K.floatx(), 'int64'] + [o.dtype for o in other] )
def bayesian_categorical_crossentropy_internal(true, pred_var): # shape: (N,) std = K.sqrt(pred_var[:, num_classes:]) # shape: (N,) variance = pred_var[:, num_classes] variance_depressor = -K.exp(-variance) #- K.ones_like(variance) # shape: (N, C) pred = pred_var[:, 0:num_classes] # shape: (N,) undistorted_loss = K.categorical_crossentropy(pred, true, from_logits=True) # shape: (T,) iterable = K.variable(np.ones(T)) dist = tfd.Normal(loc=K.zeros_like(std), scale=std) monte_carlo_results = K.map_fn(self.gaussian_categorical_crossentropy(true, pred, dist, undistorted_loss, num_classes), iterable, name='monte_carlo_results') variance_loss = K.mean(monte_carlo_results, axis=0) * undistorted_loss return variance_loss + 0.5*variance_depressor + undistorted_loss
def pool_image_rois(self, x, roi_boxes): def crop_pool_roi(roi_box): roi_box = K.cast(roi_box, "int32") ow = roi_box[0] oh = roi_box[1] tw = roi_box[2] th = roi_box[3] roi = tf.image.crop_to_bounding_box(x, offset_height=oh, offset_width=ow, target_height=th, target_width=tw) pooled_roi = self.pool_roi(roi) return pooled_roi rois = K.map_fn(crop_pool_roi, roi_boxes, dtype=tf.float32) return rois
def call(self, x, mask=None): # x should be an output and a target assert len(x) == 2 losses = _per_sample_loss(self.loss, mask, x) print([g for g in K.gradients(losses, self.parameter_list)]) if self.fast: grads = K.sqrt( sum([ self._sum_per_sample(K.square(g)) for g in K.gradients(losses, self.parameter_list) ])) else: nb_samples = K.shape(losses)[0] grads = K.map_fn(lambda i: self._grad_norm(losses[i]), K.arange(0, nb_samples), dtype=K.floatx()) return K.reshape(grads, (-1, 1))
def _ctdet_decode(hm, reg, wh, k=100, output_stride=4): hm = K.sigmoid(hm) hm = _nms(hm) hm_shape = K.shape(hm) reg_shape = K.shape(reg) wh_shape = K.shape(wh) batch, width, cat = hm_shape[0], hm_shape[2], hm_shape[3] hm_flat = K.reshape(hm, (batch, -1)) reg_flat = K.reshape(reg, (reg_shape[0], -1, reg_shape[-1])) wh_flat = K.reshape(wh, (wh_shape[0], -1, wh_shape[-1])) def _process_sample(args): _hm, _reg, _wh = args _scores, _inds = tf.math.top_k(_hm, k=k, sorted=True) _classes = K.cast(_inds % cat, 'float32') _inds = K.cast(_inds / cat, 'int32') _xs = K.cast(_inds % width, 'float32') _ys = K.cast(K.cast(_inds / width, 'int32'), 'float32') _wh = K.gather(_wh, _inds) _reg = K.gather(_reg, _inds) _xs = _xs + _reg[..., 0] _ys = _ys + _reg[..., 1] _x1 = _xs - _wh[..., 0] / 2 _y1 = _ys - _wh[..., 1] / 2 _x2 = _xs + _wh[..., 0] / 2 _y2 = _ys + _wh[..., 1] / 2 # rescale to image coordinates _x1 = output_stride * _x1 _y1 = output_stride * _y1 _x2 = output_stride * _x2 _y2 = output_stride * _y2 _detection = K.stack([_x1, _y1, _x2, _y2, _scores, _classes], -1) return _detection detections = K.map_fn(_process_sample, [hm_flat, reg_flat, wh_flat], dtype=K.floatx()) return detections
def call(self, x, mask=None): def batch_pool_rois(roi_box): ow = roi_box[0] oh = roi_box[1] tw = roi_box[2] th = roi_box[3] rois = tf.image.crop_to_bounding_box(x, offset_height=oh, offset_width=ow, target_height=th, target_width=tw) pooled_roi = self.pool_roi(rois) # [batch_size, n_channels] return pooled_roi pooled_roi_batch = K.map_fn( batch_pool_rois, self.roi_boxes, dtype=tf.float32) # [n_rois, batch_size, n_channels] pooled_roi_batch = tf.transpose( pooled_roi_batch, [1, 0, 2]) # [batch_size, n_rois, n_channels] return pooled_roi_batch
def heteroscedastic_categorical_crossentropy(true, pred): mean = pred[:, :D] log_var = pred[:, D:] log_std = K.sqrt(log_var) # variance depressor logvar_dep = K.exp(log_var) - K.ones_like(log_var) # undistorted loss undistorted_loss = K.categorical_crossentropy(mean, true, from_logits=True) # apply montecarlo simulation dist = distributions.Normal(loc=K.zeros_like(log_std), scale=log_std) monte_carlo_results = K.map_fn(gaussian_categorical_crossentropy( true, mean, dist, undistorted_loss, D), iterable, name='monte_carlo_results') var_loss = K.mean(monte_carlo_results, axis=0) * undistorted_loss return var_loss + undistorted_loss + K.sum(logvar_dep, -1)
def bayesian_categorical_crossentropy_internal(true, pred_var): # shape: (N,) std = K.sqrt(pred_var[:, D:]) # shape: (N,) variance = pred_var[:, D] variance_depressor = K.exp(variance) - K.ones_like(variance) # shape: (N, C) pred = pred_var[:, 0:D] # shape: (N,) undistorted_loss = K.categorical_crossentropy(pred, true, from_logits=True) # shape: (T,) # iterable = K.variable(np.ones(T)) dist = distributions.Normal(loc=K.zeros_like(std), scale=std) monte_carlo_results = K.map_fn(gaussian_categorical_crossentropy( true, pred, dist, undistorted_loss, D), iterable, name='monte_carlo_results') variance_loss = K.mean(monte_carlo_results, axis=0) * undistorted_loss return variance_loss + undistorted_loss + variance_depressor
def _mask_batch_affine_warp3d(self, masks, theta): """ affine transforms 3d images Parameters ---------- imgs : tf.Tensor images to be warped [n_batch, xlen, ylen, zlen, n_channel] theta : tf.Tensor parameters of affine transformation [n_batch, 12] Returns ------- output : tf.Tensor warped images [n_batch, xlen, ylen, zlen, n_channel] """ n_batch = tf.shape(masks)[0] xlen = tf.shape(masks)[1] ylen = tf.shape(masks)[2] zlen = tf.shape(masks)[3] c = K.map_fn(self._mask_rotation_matrix_zyz, theta) theta = tf.reshape(c, [-1, 3, 4]) matrix = tf.slice(theta, [0, 0, 0], [-1, -1, 3]) t = tf.slice(theta, [0, 0, 3], [-1, -1, -1]) grids = self._batch_mgrid(n_batch, xlen, ylen, zlen) grids = tf.reshape(grids, [n_batch, 3, -1]) T_g = tf.matmul(matrix, grids) + t T_g = tf.reshape(T_g, [n_batch, 3, xlen, ylen, zlen]) output = self._batch_warp3d(masks, T_g) return output
def call(self, x, mask=None): a = dot_product(x, self.Wa) def label_wise_attention(values): doc_repi, ai = values ai = K.softmax(K.transpose(ai)) label_aware_doc_rep = K.dot(ai, doc_repi) if self.return_attention: return [label_aware_doc_rep, ai] else: return [label_aware_doc_rep, label_aware_doc_rep] label_aware_doc_reprs, attention_scores = K.map_fn(label_wise_attention, [x, a]) # Compute label-scores label_aware_doc_reprs = K.sum(label_aware_doc_reprs * self.Wo, axis=-1) + self.bo label_aware_doc_reprs = K.sigmoid(label_aware_doc_reprs) if self.return_attention: return [label_aware_doc_reprs, attention_scores] return label_aware_doc_reprs
def call(self, x): X = [] for i in range(self.n_activations): self.i = i frame = x[:, :, i:i + 1] sigma1 = K.variable( np.zeros((self.batch, frame.shape[1], frame.shape[2]))) for j in range(self.v_order): j_fact = math.factorial(j) p = tfm.divide(K.pow(frame, j), j_fact) sigma1 = tfm.add(sigma1, tfm.multiply(self.v[i][j], p)) sigma2 = K.variable( np.zeros((self.batch, frame.shape[1], frame.shape[2]))) for j in range(1, self.w_order + 1): self.k = j sigma2 = tfm.add(sigma2, K.map_fn(self.basis2, frame)) X.append(tfm.add(sigma1, sigma2)) output = K.concatenate(X, axis=2) return output
def call(self, x, mask=None): # Rotate image according to orientation if self.mask_orientation == 'tb': pass elif self.mask_orientation == 'bt': x = K.map_fn(lambda l: tf.image.rot90(l, k=2), x) elif self.mask_orientation == 'lr': x = K.map_fn(lambda l: tf.image.rot90(l, k=3), x) elif self.mask_orientation == 'rl': x = K.map_fn(lambda l: tf.image.rot90(l, k=1), x) # Convolve outputs = K.conv2d( x, self.kernel * self.mask, # masked kernel strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) # Restore image rotation according to orientation if self.mask_orientation == 'tb': pass elif self.mask_orientation == 'bt': outputs = K.map_fn(lambda l: tf.image.rot90(l, k=2), outputs) elif self.mask_orientation == 'lr': outputs = K.map_fn(lambda l: tf.image.rot90(l, k=1), outputs) elif self.mask_orientation == 'rl': outputs = K.map_fn(lambda l: tf.image.rot90(l, k=3), outputs) # Add bias if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) # Add activation if self.activation is not None: return self.activation(outputs) return outputs
def pointer_gen_decoder(embedding_layer, decoder_lstm, att_w1, att_w2, att_w3, att_v, vocab_d, vocab_d_pre, pgen_w1, pgen_w2, pgen_w3, utterance_att_w1, utterance_att_w2, utterance_att_v, encoder_h=128, input_len=500, output_len=101, max_utterances=50, tf_float=tf.float32, tf_int=tf.int32): """ Returns the decoder portion of the pointer-gen network args: input_len: the length of the input sequence (to the encoder) output_len: the length of the output sequence (from the decoder) tf_float,tf_int: defining datatypes for use in this model """ h = Input(shape=(input_len, encoder_h * 2), dtype=tf_float) # the input embedding from the encoder model x_indices_ = Input( shape=(input_len), dtype=tf_int ) # represents where each input word prob. should be added in joint prob. vector x_indices = tf.expand_dims(x_indices_, axis=-1) fixed_vocab_indices_ = Input( shape=(30000), dtype=tf_int) # the size of the input vocabulary fixed_vocab_indices = tf.expand_dims(fixed_vocab_indices_, axis=-1) att_mask = Input( shape=(input_len), dtype=tf_float ) # mask used with the attention distribution to mask out padding decoder_x = Input( shape=(output_len), dtype=tf_int ) # delayed y_data for input to the decoder (for teacher-forcing) y_indices = Input( shape=(output_len), dtype=tf_int ) # indices of the correct word in the joint_probabilities vector utterance_att_mask = Input( shape=(max_utterances), dtype=tf_float ) # used for canceling out attention values when num-utterances < max_utterances utterance_indices = Input( shape=(max_utterances), dtype=tf_int) # indices of the end of each utterance in input utterance_lengths = Input( shape=(max_utterances), dtype=tf_int) # length of each utterance in input s_ = Input(shape=(256), dtype=tf_float) # decoder_h c_ = Input(shape=(256), dtype=tf_float) coverage_vector_ = Input(shape=(input_len), dtype=tf_float) s, c, coverage_vector = s_, c_, coverage_vector_ utterance_h = tf.gather(h, utterance_indices, batch_dims=1) decoder_e = embedding_layer( decoder_x) # embeddings for delayed input to the decoder outputs = [ ] # stores probability of correct ground-truth predictions at each decoder output step coverage_loss_contributions = [ ] # stores coverage loss contribution for each decoder output step for i in range(output_len): # loop through each step of the decoder decoder_input = decoder_e[:, i, :] # input to the decoder at this timestep s, _, c = decoder_lstm(tf.expand_dims(decoder_input, axis=1), initial_state=[s, c]) # calculating utterance-level attention: s_rep = RepeatVector(max_utterances)( s) # copying the decoder hidden state utterance_e = utterance_att_v( Activation("tanh")(utterance_att_w1(utterance_h) + utterance_att_w2(s_rep))) utterance_e = tf.squeeze( utterance_e, axis=-1) + utterance_att_mask # including attention mask utterance_a = Activation("softmax")( utterance_e) # scaled sentence-level attention utterance_a = Lambda(lambda entry: K.map_fn( lambda entry: tf.repeat(entry[0], entry[1], axis=0), entry, dtype=tf_float))([utterance_a, utterance_lengths ]) # same shape as word-level att. # calculating word-level attention (probabilities over input): s_rep = RepeatVector(input_len)(s) # copying the decoder hidden state e = att_v( Activation("tanh")(att_w1(h) + att_w2(s_rep) + att_w3(tf.expand_dims(coverage_vector, axis=-1)) )) # unscaled attention e = tf.squeeze( e, axis=-1 ) + att_mask # using attention mask (masks out padding in the input sequence) a = Activation("softmax")( e) # scaled attention (represents prob. over input) # calculating hierarchical attention (combination of utterance-level and word-level attention): a = a * utterance_a a = Activation("softmax")(a) # handling coverage vector computations: step_coverage_loss = tf.reduce_sum( tf.minimum(coverage_vector, a), axis=-1) # cov loss at this decoder step coverage_loss_contributions.append(step_coverage_loss) coverage_vector += a # calculating probabilities over fixed vocabulary: context = Dot(axes=1)([a, h]) # calculating the context vector pre_vocab_prob = Concatenate()([s, context]) pre_vocab_prob = vocab_d_pre(pre_vocab_prob) # extra Dense layer pre_vocab_prob = vocab_d(pre_vocab_prob) vocab_prob = Activation("softmax")(pre_vocab_prob) # calculation probabilty for text generation: pre_gen_prob = pgen_w1(context) + pgen_w2(s) + pgen_w3(decoder_input) gen_prob = Activation("sigmoid")(pre_gen_prob) # calculating joint-probability for generation/copying: vocab_prob *= gen_prob # probability of generating a word from the fixed vocabulary copy_prob = a * (1 - gen_prob ) # probability of copying a word from the input # creating the joint-probability vector: vocab_prob_projected = apply_scatter_nd(vocab_prob, fixed_vocab_indices, tf_int, tf_float) joint_prob = apply_scatter_nd_add(vocab_prob_projected, copy_prob, x_indices, tf_int, tf_float) # gathering predictions from joint-probability vector - doing it here will reduce memory consumption y_indices_i = tf.expand_dims( y_indices[:, i], axis=-1) # getting predictions at time i for whole batch predictions_i = tf.squeeze(tf.gather(joint_prob, y_indices_i, batch_dims=1, axis=-1), axis=-1) outputs.append(predictions_i) prediction_probabilities = K.permute_dimensions( tf.convert_to_tensor(outputs), (1, 0)) coverage_loss_contributions = K.permute_dimensions( tf.convert_to_tensor(coverage_loss_contributions), (1, 0)) model = Model( inputs=[ h, x_indices_, decoder_x, att_mask, y_indices, s_, c_, coverage_vector_, fixed_vocab_indices_, utterance_att_mask, utterance_indices, utterance_lengths ], outputs=[prediction_probabilities, coverage_loss_contributions]) return model
def discriminator_network(global_shape=(256, 256, 3), local_shape=(128, 128, 3)): def crop_image(img, crop): return tf.image.crop_to_bounding_box(img, crop[1], crop[0], crop[3] - crop[1], crop[2] - crop[0]) in_pts = layers.Input(shape=(4, ), dtype='int32') # [y1,x1,y2,x2] cropping = layers.Lambda(lambda x: K.map_fn( lambda y: crop_image(y[0], y[1]), elems=x, dtype=tf.float32), output_shape=local_shape) global_img = layers.Input(shape=global_shape) local_img = cropping([global_img, in_pts]) local_img.set_shape((None, ) + local_shape) # global discriminator out_global = layers.Conv2D(64, kernel_size=5, strides=2, padding='same')(global_img) out_global = layers.BatchNormalization()(out_global) out_global = layers.Activation('relu')(out_global) out_global = layers.Conv2D(128, kernel_size=5, strides=2, padding='same')(out_global) out_global = layers.BatchNormalization()(out_global) out_global = layers.Activation('relu')(out_global) out_global = layers.Conv2D(256, kernel_size=5, strides=2, padding='same')(out_global) out_global = layers.BatchNormalization()(out_global) out_global = layers.Activation('relu')(out_global) out_global = layers.Conv2D(512, kernel_size=5, strides=2, padding='same')(out_global) out_global = layers.BatchNormalization()(out_global) out_global = layers.Activation('relu')(out_global) out_global = layers.Conv2D(512, kernel_size=5, strides=2, padding='same')(out_global) out_global = layers.BatchNormalization()(out_global) out_global = layers.Activation('relu')(out_global) out_global = layers.Conv2D(512, kernel_size=5, strides=2, padding='same')(out_global) out_global = layers.BatchNormalization()(out_global) out_global = layers.Activation('relu')(out_global) out_global = layers.Flatten()(out_global) out_global = layers.Dense(1024, activation='relu')(out_global) # local discriminator out_local = layers.Conv2D(64, kernel_size=5, strides=2, padding='same')(local_img) out_local = layers.BatchNormalization()(out_local) out_local = layers.Activation('relu')(out_local) out_local = layers.Conv2D(128, kernel_size=5, strides=2, padding='same')(out_local) out_local = layers.BatchNormalization()(out_local) out_local = layers.Activation('relu')(out_local) out_local = layers.Conv2D(256, kernel_size=5, strides=2, padding='same')(out_local) out_local = layers.BatchNormalization()(out_local) out_local = layers.Activation('relu')(out_local) out_local = layers.Conv2D(512, kernel_size=5, strides=2, padding='same')(out_local) out_local = layers.BatchNormalization()(out_local) out_local = layers.Activation('relu')(out_local) out_local = layers.Conv2D(512, kernel_size=5, strides=2, padding='same')(out_local) out_local = layers.BatchNormalization()(out_local) out_local = layers.Activation('relu')(out_local) out_local = layers.Flatten()(out_local) out_local = layers.Dense(1024, activation='relu')(out_local) # concatenate local and global discriminator out = layers.concatenate([out_local, out_global]) out = layers.Dense(1, activation='sigmoid')(out) return Model(inputs=[global_img, in_pts], outputs=out)
def gmean(y_true, y_pred): """Compute the geometric mean. The geometric mean (G-mean) is the root of the product of class-wise sensitivity. This measure tries to maximize the accuracy on each of the classes while keeping these accuracies balanced. Papers .. [1] Kubat, M. and Matwin, S. "Addressing the curse of imbalanced training sets: one-sided selection" ICML (1997) .. [2] Barandela, R., Sánchez, J. S., Garcıa, V., & Rangel, E. "Strategies for learning in class imbalance problems", Pattern Recognition, 36(3), (2003), pp 849-851. """ def recall(y_true, y_pred): y_pred = backend.round(y_pred) tp = backend.sum(backend.cast(y_true * y_pred, tf.float32), axis=0) fp = backend.sum(backend.cast((1 - y_true) * y_pred, tf.float32), axis=0) fn = backend.sum(backend.cast(y_true * (1 - y_pred), tf.float32), axis=0) return tp / (tp + fn + backend.epsilon()) def element_wise_recall(y_true, y_pred): y_pred = backend.round(y_pred) tp = backend.cast(y_true * y_pred, tf.float32) fp = backend.cast((1 - y_true) * y_pred, tf.float32) fn = backend.cast(y_true * (1 - y_pred), tf.float32) return tp / (tp + fn + backend.epsilon()) def number_of_classes(y_pred): value = backend.shape(y_pred)[1] return tf.cond(tf.equal(value, 0), lambda: tf.constant(0, tf.int32), lambda: value) # Create empty recall list recalls = tf.constant(1.0, shape=[0, 10]) def multiply_recalls(x): X = tf.cond(tf.equal(x[0], x[1]), lambda: tf.constant(1, tf.int32), lambda: tf.constant(0, tf.int32)) y = 1 r = element_wise_recall(y, X) indices = x[0] tf.scatter_add(recalls, indices, r) # flatten y_true y_true = tf.reshape(y_true, [-1]) # get number of classes num_classes = number_of_classes(y_pred) # class predictions y_pred_classes = backend.map_fn(lambda x: backend.argmax(x), y_pred) # Concat y_true_y_pred = tf.stack([y_true, y_pred_classes]) # create recall value per class backend.map_fn(lambda x: multiply_recalls(x), y_true_y_pred) # Multiply recall values recall_value = backend.prod(recalls) # create exponent b = tf.constant(1, tf.float32) / num_classes result = tf.pow(recall_value, b) with tf.Session() as sess: return sess.run(result)