def call(self, inputs): # TODO: implement mahalanobis distance to compare results inputs = tf.dtypes.cast(inputs, tf.float32) # import ipdb; ipdb.set_trace() inputs = K.transpose(K.expand_dims(inputs, 1)) # with norm: # rho = K.exp(- self.beta * K.pow(tf.norm(inputs - self.mu, ord = 'euclidean', axis = 0),2)) # same as with norm but less comp cost: if self.dist == "mahalanobis": if self.same_smooth_fac: rho = K.exp(-self.beta * K.sum(K.pow(inputs - self.mu, 2), axis=0)) else: rho = K.exp(-tf.math.abs(self.beta) * K.sum(K.pow(inputs - self.mu, 2), axis=0)) # beta has to be positive else: if self.same_smooth_fac: rho = K.exp(-self.beta * K.sum(K.pow(inputs - self.mu, 2), axis=0)) else: rho = K.exp(-tf.math.abs(self.beta) * K.sum(K.pow(inputs - self.mu, 2), axis=0)) # beta has to be positive if self.normalization: return K.transpose(rho / (K.sum(rho, axis=0) + 10e-6)) else: return K.transpose(rho)
def _partial_powers(one_hot_encoded_row, Aadj_T, num_powers): """ This function computes the first num_powers powers of the adjacency matrix for the row specified in one_hot_encoded_row Args: one_hot_encoded_row: one-hot-encoded row Aadj_T: the transpose of the adjacency matrix num_powers (int): the adjacency number of powers to compute returns: A matrix of the shape (num_powers, Aadj_T.shape[1]) of the specified row of the first num_powers of the adjacency matrix. """ # make sure the transpose of the adjacency is used # tensorflow requires that the sparse matrix is the first operand partial_power = tf.reshape( tf.sparse.to_dense(one_hot_encoded_row), shape=(1, Aadj_T.shape[1]) ) partial_powers_list = [] for i in range(num_powers): partial_power = K.transpose(K.dot(Aadj_T, K.transpose(partial_power))) partial_powers_list.append(partial_power) return K.squeeze(tf.stack(partial_powers_list, axis=1), axis=0)
def _zoh(self): M = K.concatenate([K.concatenate([K.transpose(self.AT), K.transpose(self.B[0])], axis=1), self.zero_padding], axis=0) eM = K.tf.linalg.expm(self.dt * M) return (K.transpose(eM[:self.order, :self.order]), K.reshape(eM[:self.order, self.order:], self.B.shape))
def calculate_alphas(conv_layer, binary_weights, M, num_epoc=500, learning_rate=0.01): ''' calculate alpha based on OLS ''' flat_conv_layer = BK.reshape(conv_layer, shape=(np.prod(conv_layer.get_shape()), 1)) flat_binary_weights = BK.reshape(binary_weights, shape=(M, -1)) alphas = BK.random_normal(shape=(M, 1)) flat_approx_layer = BK.dot(BK.transpose(flat_binary_weights), alphas) loss = [] for _ in range(num_epoc): flat_approx_layer = BK.dot(BK.transpose(flat_binary_weights), alphas) curr_loss = BK.mean(BK.square(flat_approx_layer - flat_conv_layer), axis=0) #tf.reduce_mean, is that correct loss.append(curr_loss) grad = BK.dot( flat_binary_weights, (flat_approx_layer - flat_conv_layer)) / flat_conv_layer.shape[0] alphas = alphas - learning_rate * grad return alphas, loss
def cell_offset_table(scale_size): # Dynamic implementation of conv dims for fully convolutional model. # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=scale_size) conv_width_index = K.arange(0, stop=scale_size) conv_height_index = K.tile(conv_height_index, [scale_size]) # 늘어놓는 함수 tile -> 같은걸 N번 반복함 # 결과 -> 0~12, 0~12, ...., 0~12 # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [scale_size, 1]) # tile을 [n, m] 쓰면 dims 2로 만들어줌 # 결과 -> [0~12], [0~12], [0~12], ... conv_width_index = K.flatten(K.transpose(conv_width_index)) # 결과 -> 0, 0, 0, 0, 0, 0, 0 (13개), 1, 1, 1, 1, 1, 1, 1 (13개), ... conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) # 결과 -> [0, 0], [1, 0], [2, 0], ..., [11, 12], [12, 12] conv_index = K.reshape(conv_index, [1, scale_size, scale_size, 1, 2]) # 결과 -> 1 * 13 * 13 에 있는 [1 * 2]의 conv index item이 만들어짐 # 각각 [1 * 2]의 값은 [0, 0], [1, 0], [2, 0], ..., [11, 12], [12, 12] # 이런 식으로 이루어져 있음 -> Mask를 만들기 위한 과정 # 결과 shape -> 1, 13, 13, 1, 2 conv_index = K.cast(conv_index, tf.float32) diff = (1 / scale_size * 416) conv_index = conv_index * diff return conv_index
def pairwise_distances(X, y=None): ''' Input: x is a Nxd matrix y is an optional Mxd matirx Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:] if y is not given then use 'y=x'. i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2 y for us is ALWAYS None ''' ### x = X[..., 0:1] y = X[..., 1:2] ### x = K.cast(x, dtype=FLOAT_TYPE) x_norm = K.reshape(K.sum((x**2), axis=1), shape=(-1, 1)) if y is not None: y = K.cast(y, dtype=FLOAT_TYPE) y_t = K.transpose(y) y_norm = K.reshape(K.sum((y**2), axis=1), shape=(1, -1)) else: y_t = K.transpose(x) y_norm = K.reshape(x_norm, shape=(1, -1)) dist = x_norm + y_norm - 2.0 * tf.matmul(x, y_t) return K.clip(dist, 0.0, float('inf'))
def Kget_dists(X): """Keras code to compute the pairwise distance matrix for a set of vectors specifie by the matrix X. """ x2 = K.expand_dims(K.sum(K.square(X), axis=1), 1) dists = x2 + K.transpose(x2) - 2 * K.dot(X, K.transpose(X)) return dists
def angular_loss_2(y_true, y_pred): y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON) loss = tf.convert_to_tensor(0, dtype=tf.float32) g = tf.constant(1.0, shape=[1], dtype=tf.float32) c = tf.constant(4.0, shape=[1], dtype=tf.float32) d = tf.constant(2.0, shape=[1], dtype=tf.float32) alpha = tf.constant(45.0, shape=[1], dtype=tf.float32) losses = [] losses2 = [] for i in range(0, batch_size, 3): try: xa = y_pred[i + 0] xp = y_pred[i + 1] xn = y_pred[i + 2] fapn = c * (tf.tan(alpha * K.transpose(xa + xp) * xn)** 2) - d * (g + tf.tan(alpha)**2) * K.transpose(xa) * xp losses.append(fapn) losses2.append(K.transpose(xa) * xn - K.transpose(xa) * xp) loss = (loss + g + _loss) except: continue loss = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses]))) loss2 = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses2]))) loss = loss + 2 * loss2 loss = loss / (batch_size / 3) zero = tf.constant(0.0, shape=[1], dtype=tf.float32) return tf.maximum(loss, zero)
def call(self, inputs, **kwargs): # to calculate soft labels we use T-distribution q = 1.0 / (1.0 + (K.sum( K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha)) q **= (self.alpha + 1.0) / 2.0 # with parameter alpha q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # normalize the probabilities return q # return the resulting soft labels as an output
def call(self, inputs): if self.trainable_kernel: output = K.dot(K.dot(inputs, self.kernel), K.transpose(inputs)) else: output = K.dot(inputs, K.transpose(inputs)) if self.activation is not None: output = self.activation(output) return output
def cosine_similarity(self, x, y): x = K.reshape(x, (K.shape(x)[0], -1)) y = K.reshape(y, (K.shape(y)[0], -1)) abs_x = K.sqrt(K.sum(K.square(x), axis=1, keepdims=True)) abs_y = K.sqrt(K.sum(K.square(y), axis=1, keepdims=True)) up = K.dot(x, K.transpose(y)) down = K.dot(abs_x, K.transpose(abs_y)) return up / down
def call(self, x_input): X = kb.transpose(x_input) linWX = kb.dot(self.W, X) F = self.softabsolute(linWX) Fsquish = self.l2row(F) Fhat = self.l2row(kb.transpose(Fsquish)) return Fhat
def call(self, inputs, **kwargs): ''' t分布により一番近いクラスタを判断する ベクトル - クラスタ中心がt分布に従うと仮定 ''' q = 1.0 / (1.0 + (K.sum( K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2))) q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) return q
def tf_sample_gibbs(A, b, beta, N): """Naive sampling from p(x) = 1/Z*exp(-beta*(x^T*A*x + b*x)""" xs = K.constant(list(itertools.product([-1,1], repeat=N))) # 2^N x N tensor quad = -beta * K.sum(tf.tensordot(xs, A, axes=[[1],[0]]) * xs[:,:,None,None], axis=1) quad = quad - K.max(quad, axis=[0])[None,:,:] # Put the highest quad logits around 0 to ensure precision when we add biases logits = quad - beta*tf.tensordot(xs, b, axes=[[1],[0]]) logits = logits - K.max(logits, axis=[0]) # Same, tensorflow doesn't seem to work well with high logits rows = tf.random.categorical(K.transpose(K.reshape(logits, (2**N,-1))), 1)[:,0] slices = tf.gather(xs, rows, axis=0) return K.reshape(K.transpose(slices), K.shape(b))
def spectral_norm(self, w, r=5): w_shape = K.int_shape(w) in_dim = np.prod(w_shape[:-1]).astype(int) out_dim = w_shape[-1] w = K.reshape(w, (in_dim, out_dim)) u = K.ones((1, in_dim)) for i in range(r): v = K.l2_normalize(K.dot(u, w)) u = K.l2_normalize(K.dot(v, K.transpose(w))) return K.sum(K.dot(K.dot(u, w), K.transpose(v)))
def call(self, x): x = K.dot(self.pca_matrix, (K.transpose(x) - self.pca_means)) x = K.transpose(x) x = tf.clip_by_value(x, params.QUANTIZE_MIN_VAL, params.QUANTIZE_MAX_VAL) x = ((x - params.QUANTIZE_MIN_VAL) * (255.0 / (params.QUANTIZE_MAX_VAL - params.QUANTIZE_MIN_VAL))) return K.cast(x, 'uint8')
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def call(self, inputs, **kwargs): embeddings = inputs[1:1 + (self.cluster_num + 1)] projections = inputs[1 + (self.cluster_num + 1):] inputs = inputs[0] if self.div_val == 1: if self.embed_dim != self.input_dim or self.force_projection: projection = self.projections if projection is None: projection = projections[0] inputs = K.dot(inputs, K.transpose(projection)) embedding = self.embeddings if embedding is None: embedding = embeddings[0] out = K.dot(inputs, K.transpose(embedding)) if self.use_bias: out = K.bias_add(out, self.biases) out = keras.activations.softmax(out, axis=-1) else: cluster_probs = None outputs = [] for i in range(len(self.cutoffs) - 1): embed_dim = self.embed_dim // (self.div_val**i) if embed_dim != self.input_dim or self.force_projection: projection = self.projections[i] if projection is None: projection = projections[i] cluster_input = K.dot(inputs, K.transpose(projection)) else: cluster_input = inputs embedding = self.embeddings[i] if embedding is None: embedding = embeddings[i] cluster_output = K.dot(cluster_input, K.transpose(embedding)) if self.use_bias: cluster_output = K.bias_add(cluster_output, self.biases[i]) if cluster_probs is None: cluster_probs = K.dot(cluster_input, self.kernel_cluster) if self.use_bias: cluster_probs = K.bias_add(cluster_probs, self.bias_cluster) cluster_output = K.concatenate( [cluster_output, cluster_probs], axis=-1) cluster_output = keras.activations.softmax(cluster_output, axis=-1) cluster_probs = cluster_output[..., -self.cluster_num:] cluster_output = cluster_output[..., :-self.cluster_num] else: cluster_output = keras.activations.softmax(cluster_output, axis=-1) cluster_output = cluster_output * K.expand_dims( cluster_probs[..., i - 1]) outputs.append(cluster_output) out = K.concatenate(outputs, axis=-1) return out
def call(self, inputs, mask=None): # Uses Student t-distribution (same as t-SNE) # inputs are the variable containing the data, shape=(number_of_samples, number_of_features) q = 1.0 / ( 1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha)) q **= ((self.alpha + 1.0) / 2.0) q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) return (q)
def call(self, inputs, **kwargs): """ RBF activation function φ = exp[-β * ||x-μ||^2] :param inputs: :param kwargs: :return: """ centers = K.expand_dims(self.centers) h = K.transpose(centers - K.transpose(inputs)) return K.exp(-self.betas * K.sum(h**2, axis=1))
def sWasserstein(P, Q, theta, nclass, Cp=None, Cq=None): lambda_ = 10.0 p = K.dot(P, K.transpose(theta)) q = K.dot(Q, K.transpose(theta)) sw = lambda_ * K.mean(oneDWassersteinV3(p, q)) if (Cp is not None) and (Cq is not None): for i in range(nclass): pi = tf.gather(p, tf.squeeze(tf.where(tf.not_equal(Cp[:, i], 0)))) qi = tf.gather(q, tf.squeeze(tf.where(tf.not_equal(Cq[:, i], 0)))) sw = sw + 100. * K.mean(oneDWassersteinV3(pi, qi)) return sw
def call(self, input): for i in range(self.num_layer): if i == 0: cross = L.Lambda(lambda x: K.batch_dot( K.dot(x, K.transpose(self.W[i])), x) + self.bias[i] + x)( input) else: cross = L.Lambda( lambda x: K.batch_dot(K.dot(x, K.transpose(self.W[i])), input) + self.bias[i] + x)(cross) return L.Flatten()(cross)
def call(self, inputs): v_dim = K.int_shape(inputs)[-1] if self.idxs == None: self.idxs = list(range(v_dim)) if self.mode == 'reverse': self.idxs = self.idxs[::-1] elif self.mode == 'random': np.random.shuffle(self.idxs) inputs = K.transpose(inputs) outputs = K.gather(inputs, self.idxs) outputs = K.transpose(outputs) return outputs
def call(self, inputs, **kwargs): """ student t-distribution, as same as used in t-SNE algorithm. q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it. Arguments: inputs: the variable containing data, shape=(n_samples, n_features) Return: q: student's t-distribution with degree alpha, or soft labels for each sample. shape=(n_samples, n_clusters) """ q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha)) q **= (self.alpha + 1.0) / 2.0 q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) return q
def call(self, inputs, **kwargs): """ Student t-distribution kernel, probability of assigning encoded sequence i to cluster k. q_{ik} = (1 + dist(z_i, m_k)^2)^{-1} / normalization. Arguments: inputs: encoded input sequences, shape=(n_samples, timesteps, n_features) Return: q: soft labels for each sample. shape=(n_samples, n_clusters) """ if self.dist_metric == 'eucl': distance = K.sum(K.sqrt( K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2)), axis=-1) elif self.dist_metric == 'cid': ce_x = K.sqrt( K.sum(K.square(inputs[:, 1:, :] - inputs[:, :-1, :]), axis=1)) # shape (n_samples, n_features) ce_w = K.sqrt( K.sum(K.square(self.clusters[:, 1:, :] - self.clusters[:, :-1, :]), axis=1)) # shape (n_clusters, n_features) ce = K.maximum(K.expand_dims(ce_x, axis=1), ce_w) / K.minimum( K.expand_dims(ce_x, axis=1), ce_w) # shape (n_samples, n_clusters, n_features) ed = K.sqrt( K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2)) # shape (n_samples, n_clusters, n_features) distance = K.sum(ed * ce, axis=-1) # shape (n_samples, n_clusters) elif self.dist_metric == 'cor': inputs_norm = (inputs - K.expand_dims( K.mean(inputs, axis=1), axis=1)) / K.expand_dims( K.std(inputs, axis=1), axis=1) # shape (n_samples, timesteps, n_features) clusters_norm = (self.clusters - K.expand_dims( K.mean(self.clusters, axis=1), axis=1)) / K.expand_dims( K.std(self.clusters, axis=1), axis=1) # shape (n_clusters, timesteps, n_features) pcc = K.mean(K.expand_dims(inputs_norm, axis=1) * clusters_norm, axis=2) # Pearson correlation coefficients distance = K.sum( K.sqrt(2.0 * (1.0 - pcc)), axis=-1 ) # correlation-based similarities, shape (n_samples, n_clusters) elif self.dist_metric == 'acf': raise NotImplementedError else: raise ValueError('Available distances are eucl, cid, cor and acf!') q = 1.0 / (1.0 + K.square(distance) / self.alpha) q **= (self.alpha + 1.0) / 2.0 q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) return q
def _pairwise_distances(self, inputs: List[Tensor]) -> Tensor: emb_c, emb_r = inputs bs = K.shape(emb_c)[0] embeddings = K.concatenate([emb_c, emb_r], 0) dot_product = K.dot(embeddings, K.transpose(embeddings)) square_norm = K.batch_dot(embeddings, embeddings, axes=1) distances = K.transpose(square_norm) - 2.0 * dot_product + square_norm distances = distances[0:bs, bs:bs+bs] distances = K.clip(distances, 0.0, None) mask = K.cast(K.equal(distances, 0.0), K.dtype(distances)) distances = distances + mask * 1e-16 distances = K.sqrt(distances) distances = distances * (1.0 - mask) return distances
def mutual_information(tensor_a, tensor_b, bins=256): channel = tensor_a.shape[-1].value _a, _ = discretize_with_histogram(K.transpose(tensor_a)[i], bins=bins) _b, _ = discretize_with_histogram(K.transpose(tensor_b)[i], bins=bins) ab = K.stack([K.flatten(_a), K.flatten(_b)]) joint_hist = K.cast(joint_histogram(ab, bins=bins), dtype=K.floatx()) joint_proba = joint_hist / K.sum(joint_hist) joint_proba = K.clip(joint_proba, 1e-7, 1) a_proba = K.sum(joint_proba, axis=1) b_proba = K.sum(joint_proba, axis=0) a_proba = K.expand_dims(a_proba, axis=-1) mui = K.sum(joint_hist * joint_proba * K.log(joint_proba / (a_proba * b_proba))) return mui
def call(self, inputs): theta_b_output, theta_f_output = super(TrendBlock, self).call(inputs) t = K.cast(K.arange(-self.fdw, self.fw, 1) / self.fdw, tf.float32) t = K.transpose( K.stack([t**i for i in range(self.theta_units)], axis=0)) t_b = t[:self.fdw] t_f = t[self.fdw:] backcast = K.dot(theta_b_output, K.transpose(t_b)) forecast = K.dot(theta_f_output, K.transpose(t_f)) return backcast, forecast
def shift(shape, stride, anchors): """Produce shifted anchors based on shape of the map and stride size. Args: shape (tuple): Shape to shift the anchors over. stride (int): Stride to shift the anchors with over the shape. anchors (numpy.array): The anchors to apply at each location. Returns: numpy.array: shifted anchors """ shift_x = (K.arange(0, shape[1], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_y = (K.arange(0, shape[0], dtype=K.floatx()) + K.constant(0.5, dtype=K.floatx())) * stride shift_x, shift_y = tf.meshgrid(shift_x, shift_y) shift_x = K.reshape(shift_x, [-1]) shift_y = K.reshape(shift_y, [-1]) shifts = K.stack([shift_x, shift_y, shift_x, shift_y], axis=0) shifts = K.transpose(shifts) number_of_anchors = K.shape(anchors)[0] k = K.shape(shifts)[0] # number of base points = feat_h * feat_w shifts = K.cast(K.reshape(shifts, [k, 1, 4]), K.floatx()) shifted_anchors = K.reshape(anchors, [1, number_of_anchors, 4]) + shifts shifted_anchors = K.reshape(shifted_anchors, [k * number_of_anchors, 4]) return shifted_anchors
def gramMatrix(x): if K.image_data_format() == "channels_first": features = K.flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram