def build_train_graph(self): (elmo_context_input, elmo_utterances_input, elmo_context_sentence_input) = \ self.elmo.build_embeddings_op(self.context_ids_ph, self.utterances_ids_ph, self.context_sentence_ids_ph) # logits with tf.variable_scope("inference", reuse=False): self.logits = self._inference( elmo_context_input['weighted_op'], self.context_len_ph, elmo_utterances_input['weighted_op'], self.utterances_len_ph, elmo_context_sentence_input['weighted_op'], self.context_sentence_len_ph, self.tot_context_len_ph, self.speaker_ph) self.loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.target_ph, name="cross_entropy") # self.logits_max = tf.argmax(self.logits, axis=-1) self.loss_op = tf.reduce_mean(self.loss_op, name="cross_entropy_mean") self.train_op = tf.train.AdamOptimizer().minimize( self.loss_op, global_step=self.global_step) eval = tf.nn.in_top_k(self.logits, self.target_ph, 1) correct_count = tf.reduce_sum(tf.cast(eval, tf.int32)) self.accuracy = tf.divide(correct_count, tf.shape(self.target_ph)[0]) self.predictions = argsort(self.logits, axis=1, direction='DESCENDING') self.confidence = tf.nn.softmax(self.logits, axis=-1)
def build_train_graph_multi_gpu(self): gpu_num = len(self.hparams.gpu_num) context_ph = tf.split(self.context_ph, gpu_num, 0) context_len_ph = tf.split(self.context_len_ph, gpu_num, 0) utterances_ph = tf.split(self.utterances_ph, gpu_num, 0) utterances_len_ph = tf.split(self.utterances_len_ph, gpu_num, 0) target_ph = tf.split(self.target_ph, gpu_num, 0) context_sentence_ph = tf.split(self.context_sentence_ph, gpu_num, 0) context_sentence_len_ph = tf.split(self.context_sentence_len_ph, gpu_num, 0) tot_context_len_ph = tf.split(self.tot_context_len_ph, gpu_num, 0) speaker_ph = tf.split(self.speaker_ph, gpu_num, 0) optimizer = tf.train.AdamOptimizer(self.hparams.learning_rate) tower_grads = [] tot_losses = [] tot_logits = [] tot_labels = [] for i, gpu_id in enumerate(self.hparams.gpu_num): with tf.device('/gpu:%d' % gpu_id): with tf.variable_scope("inference", reuse=tf.AUTO_REUSE): logits = self._inference( context_ph[i], context_len_ph[i], utterances_ph[i], utterances_len_ph[i], context_sentence_ph[i], context_sentence_len_ph[i], tot_context_len_ph[i], speaker_ph[i]) loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=target_ph[i], name="cross_entropy") loss_op = tf.reduce_mean(loss_op, name="cross_entropy_mean") tot_losses.append(loss_op) tot_logits.append(logits) tot_labels.append(target_ph[i]) grads = optimizer.compute_gradients(loss_op) tower_grads.append(grads) tf.get_variable_scope().reuse_variables() grads = average_gradients(tower_grads) self.loss_op = tf.divide(tf.add_n(tot_losses), gpu_num) self.logits = tf.concat(tot_logits, axis=0) tot_labels = tf.concat(tot_labels, axis=0) self.train_op = optimizer.apply_gradients(grads, self.global_step) eval = tf.nn.in_top_k(self.logits, tot_labels, 1) correct_count = tf.reduce_sum(tf.cast(eval, tf.int32)) self.accuracy = tf.divide(correct_count, tf.shape(self.target_ph)[0]) self.predictions = argsort(self.logits, axis=1, direction='DESCENDING')
def _classification_loss(self, pred_label, gt_label, num_matched_boxes): """Computes the classification loss. Computes the classification loss with hard negative mining. Args: pred_label: a flatten tensor that includes all predicted class. The shape is [batch_size, num_anchors, num_classes]. gt_label: a tensor that represents the classification groundtruth targets. The shape is [batch_size, num_anchors, 1]. num_matched_boxes: the number of anchors that are matched to a groundtruth targets. This is used as the loss normalizater. Returns: box_loss: a float32 representing total box regression loss. """ cross_entropy = tf.losses.sparse_softmax_cross_entropy( gt_label, pred_label, reduction=tf.losses.Reduction.NONE) mask = tf.greater(tf.squeeze(gt_label), 0) float_mask = tf.cast(mask, tf.float32) # Hard example mining neg_masked_cross_entropy = cross_entropy * (1 - float_mask) relative_position = contrib_framework.argsort( contrib_framework.argsort(neg_masked_cross_entropy, direction='DESCENDING')) num_neg_boxes = tf.minimum( tf.to_int32(num_matched_boxes) * ssd_constants.NEGS_PER_POSITIVE, ssd_constants.NUM_SSD_BOXES) top_k_neg_mask = tf.cast( tf.less( relative_position, tf.tile(num_neg_boxes[:, tf.newaxis], (1, ssd_constants.NUM_SSD_BOXES))), tf.float32) class_loss = tf.reduce_sum(tf.multiply(cross_entropy, float_mask + top_k_neg_mask), axis=1) return tf.reduce_mean(class_loss / num_matched_boxes)
def non_max_suppression(detection_boxes, detection_scores, max_output_size=70, iou_threshold=0.05, scope=None): """"Non max suppression and abnormal filtering.""" with tf.name_scope(scope, 'Non_max_suppression', [max_output_size, iou_threshold]): selected_indices = tf.image.non_max_suppression( detection_boxes, detection_scores, max_output_size, iou_threshold) result_boxes = tf.gather(detection_boxes, selected_indices, name='result_boxes') result_scores = tf.gather(detection_scores, selected_indices, name='result_scores') abnormal_inter = target_assigner.iou(result_boxes, result_boxes) abnormal_inter = tf.where((abnormal_inter > 0) & (abnormal_inter < 1), tf.ones_like(abnormal_inter), tf.zeros_like(abnormal_inter), name='abnormal_inter') num_inter = tf.reduce_sum(abnormal_inter, 0) abnormal_inter_idx = tf.where(num_inter >= 2) abnormal_inter_idx = tf.reshape(abnormal_inter_idx, [-1]) abnormal_inter_idx = tf.cast(abnormal_inter_idx, tf.int32, name='abnormal_inter_idx') abnormal_indices = argsort(result_boxes[:, :2], axis=0) abnormal_indices = tf.concat( [abnormal_indices[:2], abnormal_indices[-2:]], 0, name='abnormal_indices') result_dict = { 'result_boxes': result_boxes, 'result_scores': result_scores, 'abnormal_indices': abnormal_indices, 'abnormal_inter_idx': abnormal_inter_idx, 'abnormal_inter': abnormal_inter } return result_dict
def build_train_graph(self): # logits with tf.variable_scope("inference", reuse=False): self.logits = self._inference(self.context_ph, self.context_len_ph, self.utterances_ph, self.utterances_len_ph) self.loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=self.target_ph, name="cross_entropy") self.loss_op = tf.reduce_mean(self.loss_op, name="cross_entropy_mean") self.train_op = tf.train.AdamOptimizer().minimize(self.loss_op, global_step=self.global_step) eval = tf.nn.in_top_k(self.logits, self.target_ph, 1) correct_count = tf.reduce_sum(tf.cast(eval, tf.int32)) self.accuracy = tf.divide(correct_count, tf.shape(self.target_ph)[0]) self.predictions = argsort(self.logits, axis=1, direction='DESCENDING')
def map_charades(y_true, y_pred): """ Returns mAP """ m_aps = [] tf_one = tf.constant(1, dtype=tf.float32) n_classes = y_pred.shape[1] for oc_i in range(n_classes): pred_row = y_pred[:, oc_i] sorted_idxs = tf_framework.argsort(-pred_row) true_row = y_true[:, oc_i] true_row = tf.map_fn(lambda i: true_row[i], sorted_idxs, dtype=np.float32) tp_poolean = tf.equal(true_row, tf_one) tp = tf.cast(tp_poolean, dtype=np.float32) fp = K.reverse(tp, axes=0) n_pos = tf.reduce_sum(tp) f_pcs = tf.cumsum(fp) t_pcs = tf.cumsum(tp) s = f_pcs + t_pcs s = tf.cast(s, tf.float32) t_pcs = tf.cast(t_pcs, tf.float32) tp_float = tf.cast(tp_poolean, np.float32) prec = t_pcs / s avg_prec = prec * tp_float n_pos = tf.cast(n_pos, tf.float32) avg_prec = avg_prec / n_pos avg_prec = tf.expand_dims(avg_prec, axis=0) m_aps.append(avg_prec) m_aps = K.concatenate(m_aps, axis=0) mAP = K.mean(m_aps) return mAP
def evaluate(self, saved_file: str): context = tf.placeholder(tf.int32, shape=[None, None], name="context") context_len = tf.placeholder(tf.int32, shape=[None], name="context_len") utterances = tf.placeholder(tf.int32, shape=[None, None, None], name="utterances") utterances_len = tf.placeholder(tf.int32, shape=[None, None], name="utterances_len") target = tf.placeholder(tf.int32, shape=[None], name="target") # logits with tf.variable_scope("inference", reuse=False): logits = self._inference(context, context_len, utterances, utterances_len) predictions = argsort(logits, axis=1, direction='DESCENDING') sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, saved_file) data = DataProcess(self.hparams.valid_path, "test", self.word2id) k_list = [1, 2, 5, 10, 50, 100] total_examples = 0 total_correct = np.zeros([6], dtype=np.int32) while True: pad_batch_data = data.get_batch_data(self.hparams.batch_size) if pad_batch_data is None: break (pad_context, context_len_batch), ( pad_utterances, utterances_len_batch), target_batch = pad_batch_data feed_dict = { context: pad_context, context_len: context_len_batch, utterances: pad_utterances, utterances_len: utterances_len_batch, target: target_batch } pred_val = sess.run([predictions], feed_dict=feed_dict) pred_val = np.asarray(pred_val).squeeze(0) num_correct, num_examples = evaluate_recall( pred_val, target_batch, k_list) total_examples += num_examples total_correct = np.add(total_correct, num_correct) recall_result = "" for i in range(len(k_list)): recall_result += "Recall@%s : " % k_list[i] + "%.2f%% | " % ( (total_correct[i] / total_examples) * 100) self._logger.info(recall_result)