def IoU(bbox, gt): # bbox = [ x , y , w , h ] ( x , y left up) shape = [-1, 1] x1 = tf.maximum(tf.cast(bbox[0], tf.float32), tf.reshape(tf.cast(gt[:,0], tf.float32), shape)) y1 = tf.maximum(tf.cast(bbox[1], tf.float32), tf.reshape(tf.cast(gt[:,1], tf.float32), shape)) x2 = tf.minimum(tf.cast(bbox[2] + bbox[0], tf.float32), tf.reshape(tf.cast(gt[:,2] + gt[:,0], tf.float32), shape)) y2 = tf.minimum(tf.cast(bbox[3] + bbox[1], tf.float32), tf.reshape(tf.cast(gt[:,3] + gt[:,1], tf.float32), shape)) inter_w = tf.sub(x2,x1) inter_h = tf.sub(y2,y1) inter = tf.cast(inter_w * inter_h, tf.float32) bounding_box = tf.cast(tf.mul(bbox[2],bbox[3]), tf.float32) ground_truth = tf.reshape(tf.cast(tf.mul(gt[:,2],gt[:,3]), tf.float32), shape) #iou = tf.div(inter,tf.sub(tf.add(bounding_box,tf.reshape(ground_truth,shape)),inter)) iou = inter / (bounding_box + ground_truth - inter) # limit the iou range between 0 and 1 mask_less = tf.cast(tf.logical_not(tf.less(iou, tf.zeros_like(iou))), tf.float32) #mask_great = tf.cast(tf.logical_not(tf.greater(iou, tf.ones_like(iou))), tf.float32) iou = tf.mul(iou, mask_less) #iou = tf.mul(iou, positive_mask) return iou
def compute_IOU(bboxA, bboxB): """Compute the Intersection Over Union. Args: bboxA: [N X 4 tensor] format = [left, top, right, bottom] bboxB: [N X 4 tensor] Return: IOU: [N X 1 tensor] """ x1A, y1A, x2A, y2A = tf.split(1, 4, bboxA) x1B, y1B, x2B, y2B = tf.split(1, 4, bboxB) # compute intersection x1_max = tf.maximum(x1A, x1B) y1_max = tf.maximum(y1A, y1B) x2_min = tf.minimum(x2A, x2B) y2_min = tf.minimum(y2A, y2B) # overlap_flag = tf.logical_and( tf.less(x1_max, x2_min), tf.less(y1_max, y2_min)) overlap_flag = tf.to_float(tf.less(x1_max, x2_min)) * \ tf.to_float(tf.less(y1_max, y2_min)) overlap_area = tf.mul(overlap_flag, tf.mul( x2_min - x1_max, y2_min - y1_max)) # compute union areaA = tf.mul(x2A - x1A, y2A - y1A) areaB = tf.mul(x2B - x1B, y2B - y1B) union_area = areaA + areaB - overlap_area return tf.div(overlap_area, union_area)
def IoULoss(self, pd, gt): mask = tf.cast( tf.greater(tf.reduce_sum( tf.cast(tf.greater(gt, 0), tf.int8), 3), 3), tf.float32 ) npd = tf.transpose(pd, [3, 0, 1, 2]) ngt = tf.transpose(gt, [3, 0, 1, 2]) area_x = tf.mul( tf.add(tf.gather(npd, 0), tf.gather(npd, 2)), tf.add(tf.gather(npd, 1), tf.gather(npd, 3)), ) area_g = tf.mul( tf.add(tf.gather(ngt, 0), tf.gather(ngt, 2)), tf.add(tf.gather(ngt, 1), tf.gather(ngt, 3)), ) w_overlap = tf.maximum(tf.constant(0, tf.float32), tf.add( tf.minimum(tf.gather(npd, 0), tf.gather(ngt, 0)), tf.minimum(tf.gather(npd, 2), tf.gather(ngt, 2)), )) h_overlap = tf.maximum(tf.constant(0, tf.float32), tf.add( tf.minimum(tf.gather(npd, 1), tf.gather(ngt, 1)), tf.minimum(tf.gather(npd, 3), tf.gather(ngt, 3)), )) area_overlap = tf.mul(w_overlap, h_overlap) area_u = tf.sub(tf.add(area_x, area_g), area_overlap) iou = tf.div(area_overlap, tf.add(area_u, tf.constant(1, tf.float32))) iou = tf.maximum(iou, tf.constant(1e-4, tf.float32)) cost = -tf.log(iou) cost = tf.mul(cost, mask) cost = tf.reduce_sum(cost) return cost
def _update_lipschitz(self,v,i): config = self.config if len(v.shape) > 1: k = self.config.weight_constraint_k or 100.0000 wi_hat = v if len(v.shape) == 4: #fij = tf.reduce_sum(tf.abs(wi_hat), axis=[0,1]) fij = wi_hat fij = tf.reduce_sum(tf.abs(fij), axis=[1]) fij = tf.reduce_max(fij, axis=[0]) else: fij = wi_hat if self.config.ortho_pnorm == "inf": wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=0), axis=0) else: # conv wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=1), axis=0) ratio = (1.0/tf.maximum(1.0, wp/k)) if self.config.weight_bounce: bounce = tf.minimum(1.0, tf.ceil(wp/k-0.999)) ratio -= tf.maximum(0.0, bounce) * 0.2 if self.config.weight_scaleup: up = tf.minimum(1.0, tf.ceil(0.02-wp/k)) ratio += tf.maximum(0.0, up) * k/wp * 0.2 wi = ratio*(wi_hat) #self.gan.metrics['wi'+str(i)]=wp #self.gan.metrics['wk'+str(i)]=ratio #self.gan.metrics['bouce'+str(i)]=bounce return tf.assign(v, wi) return None
def transformer_policy(global_step, learning_rate, d_model, warmup_steps, max_lr=None, coefficient=1.0, dtype=tf.float32): """Transformer's learning rate policy from https://arxiv.org/pdf/1706.03762.pdf with a hat (max_lr) (also called "noam" learning rate decay scheme). Args: global_step: global step TensorFlow tensor (ignored for this policy). learning_rate (float): initial learning rate to use. d_model (int): model dimensionality. warmup_steps (int): number of warm-up steps. max_lr (float): maximal learning rate, i.e. hat. coefficient (float): optimizer adjustment. Recommended 0.002 if using "Adam" else 1.0. dtype: dtype for this policy. Returns: learning rate at step ``global_step``. """ step_num = tf.cast(global_step, dtype=dtype) ws = tf.cast(warmup_steps, dtype=dtype) decay = coefficient * d_model ** -0.5 * tf.minimum( (step_num + 1) * ws ** -1.5, (step_num + 1) ** -0.5) new_lr = decay * learning_rate if max_lr is not None: return tf.minimum(max_lr, new_lr) else: return new_lr
def sample_from_discretized_mix_logistic(l, nr_mix): ls = int_shape(l) xs = ls[:-1] + [3] # unpack parameters logit_probs = l[:, :, :, :nr_mix] l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix * 3]) # sample mixture indicator from softmax sel = tf.one_hot(tf.argmax(logit_probs - tf.log(-tf.log(tf.random_uniform( logit_probs.get_shape(), minval=1e-5, maxval=1. - 1e-5))), 3), depth=nr_mix, dtype=tf.float32) sel = tf.reshape(sel, xs[:-1] + [1, nr_mix]) # select logistic parameters means = tf.reduce_sum(l[:, :, :, :, :nr_mix] * sel, 4) log_scales = tf.maximum(tf.reduce_sum( l[:, :, :, :, nr_mix:2 * nr_mix] * sel, 4), -7.) coeffs = tf.reduce_sum(tf.nn.tanh( l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) * sel, 4) # sample from logistic & clip to interval # we don't actually round to the nearest 8bit value when sampling u = tf.random_uniform(means.get_shape(), minval=1e-5, maxval=1. - 1e-5) x = means + tf.exp(log_scales) * (tf.log(u) - tf.log(1. - u)) x0 = tf.minimum(tf.maximum(x[:, :, :, 0], -1.), 1.) x1 = tf.minimum(tf.maximum( x[:, :, :, 1] + coeffs[:, :, :, 0] * x0, -1.), 1.) x2 = tf.minimum(tf.maximum( x[:, :, :, 2] + coeffs[:, :, :, 1] * x0 + coeffs[:, :, :, 2] * x1, -1.), 1.) return tf.concat([tf.reshape(x0, xs[:-1] + [1]), tf.reshape(x1, xs[:-1] + [1]), tf.reshape(x2, xs[:-1] + [1])], 3)
def disjunction_of_literals(literals, label="no_label"): list_of_literal_tensors = [lit.tensor for lit in literals] literals_tensor = tf.concat(1,list_of_literal_tensors) if default_tnorm == "product": result = 1.0-tf.reduce_prod(1.0-literals_tensor, 1, keep_dims=True) if default_tnorm == "yager2": result = tf.minimum(1.0, tf.sqrt(tf.reduce_sum(tf.square(literals_tensor), 1, keep_dims=True))) if default_tnorm == "luk": print "data aggregator is lukas" result = tf.minimum(1.0, tf.reduce_sum(literals_tensor, 1, keep_dims=True)) PR(result) if default_tnorm == "goedel": result = tf.reduce_max(literals_tensor, 1, keep_dims=True, name=label) if default_aggregator == "product": return tf.reduce_prod(result, keep_dims=True) if default_aggregator == "mean": print "data aggregator is mean" return tf.reduce_mean(result, keep_dims=True, name=label) if default_aggregator == "gmean": return tf.exp(tf.mul(tf.reduce_sum(tf.log(result), keep_dims=True), tf.inv(tf.to_float(tf.size(result)))), name=label) if default_aggregator == "hmean": print "data aggregator is hmean" return tf.div(tf.to_float(tf.size(result)), tf.reduce_sum(tf.inv(result), keep_dims=True)) if default_aggregator == "min": print "data aggregator is min" return tf.reduce_min(result, keep_dims=True, name=label) if default_aggregator == "qmean": print "data aggregator is qmean" return tf.sqrt(tf.reduce_mean(tf.square(result), keep_dims=True), name=label) if default_aggregator == "cmean": print "data aggregator is cmean" return tf.pow(tf.reduce_mean(tf.pow(result, 3), keep_dims=True), tf.inv(tf.to_float(3)), name=label)
def clip_boxes(bboxes, imshape): """ Clips bounding boxes to image boundaries based on image shape. Args: bboxes: Tensor with shape (num_bboxes, 4) where point order is x1, y1, x2, y2. imshape: Tensor with shape (2, ) where the first value is height and the next is width. Returns Tensor with same shape as bboxes but making sure that none of the bboxes are outside the image. """ with tf.name_scope('BoundingBoxTransform/clip_bboxes'): bboxes = tf.cast(bboxes, dtype=tf.float32) imshape = tf.cast(imshape, dtype=tf.float32) x1, y1, x2, y2 = tf.split(bboxes, 4, axis=1) width = imshape[1] height = imshape[0] x1 = tf.maximum(tf.minimum(x1, width - 1.0), 0.0) x2 = tf.maximum(tf.minimum(x2, width - 1.0), 0.0) y1 = tf.maximum(tf.minimum(y1, height - 1.0), 0.0) y2 = tf.maximum(tf.minimum(y2, height - 1.0), 0.0) bboxes = tf.concat([x1, y1, x2, y2], axis=1) return bboxes
def copy_net_logit_function(state): state = tf.nn.dropout(state, self.dropout_placeholder) # the logits for generating the next word are computed in # the standard way generate_logits = tf.matmul(state, decoding_w) + decoding_b # Equation 8 in the paper ... in shape of source sentence # (batch x time) copy_logits_in_time = tf.reduce_sum( projected_inputs * tf.expand_dims(state, 1), [2]) # mask out the padding in exponential domain copy_logits_in_time_exp_masked = tf.exp( tf.minimum([[80.0]], copy_logits_in_time)) * copy_mask # ... in shape of vocabulary (batch x time x vocabulary) copy_logits_in_vocabulary = tf.expand_dims( copy_logits_in_time_exp_masked, 2) * vocabulary_shaped_indices # Equation 6 without normalization copy_logits_exp = tf.reduce_sum(copy_logits_in_vocabulary, [1]) logits_exp = copy_logits_exp \ + tf.exp(tf.minimum([[80.0]], generate_logits)) return (tf.log(tf.maximum([[1e-40]], logits_exp)), copy_logits_in_time)
def sum_ohem_loss(cls_score, label, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, nr_ohem_sampling, sigma=1.0, dim=[1]): cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cls_score, labels=label) box_loss_base = _smooth_l1_loss_base(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=sigma, dim=[1]) box_loss = tf.reduce_sum(box_loss_base, axis=dim) cls_box_loss = cls_loss + box_loss nr_ohem_sampling = tf.minimum(nr_ohem_sampling, tf.shape(cls_box_loss)[0]) topk_val, topk_idx = tf.nn.top_k(cls_box_loss, k=nr_ohem_sampling, sorted=True, name='ohem_loss_index') cls_loss_ohem = tf.gather(cls_loss, topk_idx, name='ohem_cls_loss') box_loss_ohem = tf.gather(box_loss, topk_idx, name='ohem_box_loss') box_loss_ohem = tf.reduce_sum(box_loss_ohem) / \ tf.to_float(nr_ohem_sampling) cls_norm = tf.stop_gradient(tf.minimum(nr_ohem_sampling, tf.shape(topk_val)[0])) # db_cls_norm = tf.py_func(debug_single, [cls_loss, box_loss, topk_idx, # cls_loss_ohem, box_loss_ohem, cls_norm], [tf.bool]) # with tf.control_dependencies(db_cls_norm): cls_loss_ohem = tf.reduce_sum(cls_loss_ohem) / tf.to_float(cls_norm) return cls_loss_ohem, box_loss_ohem
def bboxes_clip(bbox_ref, bboxes, scope=None): """Clip bounding boxes to a reference box. Batch-compatible if the first dimension of `bbox_ref` and `bboxes` can be broadcasted. Args: bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor; bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary. Return: Clipped bboxes. """ # Bboxes is dictionary. if isinstance(bboxes, dict): with tf.name_scope(scope, 'bboxes_clip_dict'): d_bboxes = {} for c in bboxes.keys(): d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c]) return d_bboxes # Tensors inputs. with tf.name_scope(scope, 'bboxes_clip'): # Easier with transposed bboxes. Especially for broadcasting. bbox_ref = tf.transpose(bbox_ref) bboxes = tf.transpose(bboxes) # Intersection bboxes and reference bbox. ymin = tf.maximum(bboxes[0], bbox_ref[0]) xmin = tf.maximum(bboxes[1], bbox_ref[1]) ymax = tf.minimum(bboxes[2], bbox_ref[2]) xmax = tf.minimum(bboxes[3], bbox_ref[3]) # Double check! Empty boxes when no-intersection. ymin = tf.minimum(ymin, ymax) xmin = tf.minimum(xmin, xmax) bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0)) return bboxes
def batch_iou(bboxes, bbox): """Compute iou of a batch of boxes with another box. Box format '[y_min, x_min, y_max, x_max]'. Args: bboxes: A batch of boxes. 2-D with shape `[B, 4]`. bbox: A single box. 1-D with shape `[4]`. Returns: Batch of IOUs """ lr = tf.maximum( tf.minimum(bboxes[:, 3], bbox[3]) - tf.maximum(bboxes[:, 1], bbox[1]), 0 ) tb = tf.maximum( tf.minimum(bboxes[:, 2], bbox[2]) - tf.maximum(bboxes[:, 0], bbox[0]), 0 ) intersection = tf.multiply(tb, lr) union = tf.subtract( tf.multiply((bboxes[:, 3] - bboxes[:, 1]), (bboxes[:, 2] - bboxes[:, 0])) + tf.multiply((bbox[3] - bbox[1]), (bbox[2] - bbox[0])), intersection ) iou = tf.div(intersection, union) return iou
def bboxes_jaccard(bbox_ref, bboxes, name=None): """Compute jaccard score between a reference box and a collection of bounding boxes. Args: bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es). bboxes: (N, 4) Tensor, collection of bounding boxes. Return: (N,) Tensor with Jaccard scores. """ with tf.name_scope(name, 'bboxes_jaccard'): # Should be more efficient to first transpose. bboxes = tf.transpose(bboxes) bbox_ref = tf.transpose(bbox_ref) # Intersection bbox and volume. int_ymin = tf.maximum(bboxes[0], bbox_ref[0]) int_xmin = tf.maximum(bboxes[1], bbox_ref[1]) int_ymax = tf.minimum(bboxes[2], bbox_ref[2]) int_xmax = tf.minimum(bboxes[3], bbox_ref[3]) h = tf.maximum(int_ymax - int_ymin, 0.) w = tf.maximum(int_xmax - int_xmin, 0.) # Volumes. inter_vol = h * w union_vol = -inter_vol \ + (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \ + (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1]) jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard') return jaccard
def bboxes_intersection(bbox_ref, bboxes, name=None): """Compute relative intersection between a reference box and a collection of bounding boxes. Namely, compute the quotient between intersection area and box area. Args: bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es). bboxes: (N, 4) Tensor, collection of bounding boxes. Return: (N,) Tensor with relative intersection. """ with tf.name_scope(name, 'bboxes_intersection'): # Should be more efficient to first transpose. bboxes = tf.transpose(bboxes) bbox_ref = tf.transpose(bbox_ref) # Intersection bbox and volume. int_ymin = tf.maximum(bboxes[0], bbox_ref[0]) int_xmin = tf.maximum(bboxes[1], bbox_ref[1]) int_ymax = tf.minimum(bboxes[2], bbox_ref[2]) int_xmax = tf.minimum(bboxes[3], bbox_ref[3]) h = tf.maximum(int_ymax - int_ymin, 0.) w = tf.maximum(int_xmax - int_xmin, 0.) # Volumes. inter_vol = h * w bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection') return scores
def fast_rcnn_minibatch(self, reference_boxes): with tf.variable_scope('fast_rcnn_minibatch'): reference_boxes_mattached_gtboxes, object_mask, label = \ self.fast_rcnn_find_positive_negative_samples(reference_boxes) positive_indices = tf.reshape(tf.where(tf.not_equal(object_mask, 0.)), [-1]) num_of_positives = tf.minimum(tf.shape(positive_indices)[0], tf.cast(self.fast_rcnn_minibatch_size*self.fast_rcnn_positives_ratio, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) negative_indices = tf.reshape(tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.minimum(tf.shape(negative_indices)[0], self.fast_rcnn_minibatch_size - num_of_positives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) minibatch_reference_boxes_mattached_gtboxes = tf.gather(reference_boxes_mattached_gtboxes, minibatch_indices) object_mask = tf.gather(object_mask, minibatch_indices) label = tf.gather(label, minibatch_indices) label_one_hot = tf.one_hot(label, self.num_classes + 1) return minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, object_mask, label_one_hot
def get_next_input(output): # the next location is computed by the location network baseline = tf.sigmoid(tf.matmul(output,Wb_h_b) + Bb_h_b) baselines.append(baseline) # compute the next location, then impose noise if eyeCentered: # add the last sampled glimpse location # TODO max(-1, min(1, u + N(output, sigma) + prevLoc)) mean_loc = tf.maximum(-1.0, tf.minimum(1.0, tf.matmul(output, Wl_h_l) + sampled_locs[-1] )) else: mean_loc = tf.matmul(output, Wl_h_l) # mean_loc = tf.stop_gradient(mean_loc) mean_locs.append(mean_loc) mean_locs_stopGrad.append(tf.stop_gradient(mean_loc)) # add noise # sample_loc = tf.tanh(mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd)) sample_loc = tf.maximum(-1.0, tf.minimum(1.0, mean_loc + tf.random_normal(mean_loc.get_shape(), 0, loc_sd))) # don't propagate throught the locations # sample_loc = tf.stop_gradient(sample_loc) sampled_locs.append(sample_loc) sampled_locs_stopGrad.append(tf.stop_gradient(sample_loc)) return get_glimpse(sample_loc)
def make_minibatch(self, valid_anchors): with tf.variable_scope('rpn_minibatch'): # in labels(shape is [N, ]): 1 is positive, 0 is negative, -1 is ignored labels, anchor_matched_gtboxes, object_mask = \ self.rpn_find_positive_negative_samples(valid_anchors) # [num_of_valid_anchors, ] positive_indices = tf.reshape(tf.where(tf.equal(labels, 1.0)), [-1]) # use labels is same as object_mask num_of_positives = tf.minimum(tf.shape(positive_indices)[0], tf.cast(self.rpn_mini_batch_size * self.rpn_positives_ratio, tf.int32)) # num of positives <= minibatch_size * 0.5 positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # positive_anchors = tf.gather(self.anchors, positive_indices) negative_indices = tf.reshape(tf.where(tf.equal(labels, 0.0)), [-1]) num_of_negatives = tf.minimum(self.rpn_mini_batch_size - num_of_positives, tf.shape(negative_indices)[0]) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) # negative_anchors = tf.gather(self.anchors, negative_indices) minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0) minibatch_indices = tf.random_shuffle(minibatch_indices) minibatch_anchor_matched_gtboxes = tf.gather(anchor_matched_gtboxes, minibatch_indices) object_mask = tf.gather(object_mask, minibatch_indices) labels = tf.cast(tf.gather(labels, minibatch_indices), tf.int32) labels_one_hot = tf.one_hot(labels, depth=2) return minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, labels_one_hot
def loss(y_true_cls, y_pred_cls, y_true_geo, y_pred_geo, training_mask): ''' define the loss used for training, contraning two part, the first part we use dice loss instead of weighted logloss, the second part is the iou loss defined in the paper :param y_true_cls: ground truth of text :param y_pred_cls: prediction os text :param y_true_geo: ground truth of geometry :param y_pred_geo: prediction of geometry :param training_mask: mask used in training, to ignore some text annotated by ### :return: ''' classification_loss = dice_coefficient(y_true_cls, y_pred_cls, training_mask) # scale classification loss to match the iou loss part classification_loss *= 0.01 # d1 -> top, d2->right, d3->bottom, d4->left d1_gt, d2_gt, d3_gt, d4_gt, theta_gt = tf.split(value=y_true_geo, num_or_size_splits=5, axis=3) d1_pred, d2_pred, d3_pred, d4_pred, theta_pred = tf.split(value=y_pred_geo, num_or_size_splits=5, axis=3) area_gt = (d1_gt + d3_gt) * (d2_gt + d4_gt) area_pred = (d1_pred + d3_pred) * (d2_pred + d4_pred) w_union = tf.minimum(d2_gt, d2_pred) + tf.minimum(d4_gt, d4_pred) h_union = tf.minimum(d1_gt, d1_pred) + tf.minimum(d3_gt, d3_pred) area_intersect = w_union * h_union area_union = area_gt + area_pred - area_intersect L_AABB = -tf.log((area_intersect + 1.0)/(area_union + 1.0)) L_theta = 1 - tf.cos(theta_pred - theta_gt) tf.summary.scalar('geometry_AABB', tf.reduce_mean(L_AABB * y_true_cls * training_mask)) tf.summary.scalar('geometry_theta', tf.reduce_mean(L_theta * y_true_cls * training_mask)) L_g = L_AABB + 20 * L_theta return tf.reduce_mean(L_g * y_true_cls * training_mask) + classification_loss
def _get_lr_from_schedule(self): TINY = 1e-8 if self._lr_sched_params['type'] == 'stable' : curr_lr = self._init_lr_tfv elif self._lr_sched_params['type'] == 'poly' : first_it_for_sch = self._lr_sched_params['poly']['epochs_wait_before_decr'] final_it_for_sch = self._lr_sched_params['poly']['final_ep_for_sch'] # * subepochs_per_ep assert first_it_for_sch < final_it_for_sch curr_it = tf.cast(self._num_epochs_trained_tfv, dtype='float32') # * subepochs_per_ep + curr_subepoch #curr_lr = init_lr * ( 1 - x/x2) ^ power. Power = 0.9 in parsenet, which we validated to behave ok. x2 = final_it_for_sch - first_it_for_sch x = tf.maximum( tf.constant(0, dtype="float32"), curr_it - first_it_for_sch ) # to make schedule happen within the window (first, final) epoch, stable outside. x = tf.minimum( x, x2 ) # in case the current iteration is after max, so that I keep schedule stable afterwards. y1 = self._init_lr_tfv y2 = 0.9 curr_lr = y1 * tf.pow( 1.0 - x/x2, y2 ) elif self._lr_sched_params['type'] == 'expon' : first_it_for_sch = self._lr_sched_params['expon']['epochs_wait_before_decr'] final_it_for_sch = self._lr_sched_params['expon']['final_ep_for_sch'] # * subepochs_per_ep assert first_it_for_sch < final_it_for_sch curr_it = tf.cast(self._num_epochs_trained_tfv, dtype='float32') # y = y1 * gamma^x. gamma = (y2 / y1)^(1/x2) x2 = final_it_for_sch - first_it_for_sch x = tf.maximum( tf.constant(0, dtype="float32"), curr_it-first_it_for_sch ) x = tf.minimum( x, x2 ) y1 = self._init_lr_tfv y2 = self._lr_sched_params['expon']['lr_to_reach_at_last_ep'] gamma = tf.pow( (y2+TINY)/y1, 1.0/x2 ) curr_lr = y1 * tf.pow( gamma, x ) elif self._lr_sched_params['type'] == 'predef' : #Predefined Schedule. div_lr_by = self._lr_sched_params['predef']['div_lr_by'] epochs_boundaries = [ tf.cast(e, tf.int32) for e in self._lr_sched_params['predef']['epochs'] ] lr_values = [ ( self._init_lr_tfv / pow(div_lr_by, i) ) for i in range( 1+len(epochs_boundaries) ) ] curr_lr = tf.train.piecewise_constant(self._num_epochs_trained_tfv, boundaries = epochs_boundaries, values = lr_values) elif self._lr_sched_params['type'] == 'auto' : self._learning_rate_tfv = tf.Variable( self._init_lr_tfv, dtype="float32", trainable=False, name="curr_lr_tfv") self._top_mean_val_acc_tfv = tf.Variable(0, dtype="float32", trainable=False, name="top_mean_val_acc") self._epoch_with_top_mean_val_acc_tvf = tf.Variable(0, dtype=self._num_epochs_trained_tfv.dtype.as_numpy_dtype, trainable=False, name="ep_top_mean_val_acc") self._last_epoch_lr_got_lowered_tvf = tf.Variable(0, dtype="float32", trainable=False, name="last_ep_lr_lowered") self._op_assign_new_lr = tf.assign(self._learning_rate_tfv, self._tf_plchld_float32) self._op_assign_top_mean_val_acc_tfv = tf.assign(self._top_mean_val_acc_tfv, self._tf_plchld_float32) self._op_assign_epoch_with_top_mean_val_acc_tvf = tf.assign(self._epoch_with_top_mean_val_acc_tvf, self._tf_plchld_int32) self._op_assign_last_epoch_lr_lowered = tf.assign(self._last_epoch_lr_got_lowered_tvf, self._tf_plchld_float32) # The LR will be changed during the routine.training, by a call to function self.run_lr_sched_updates( sessionTf ) curr_lr = self._learning_rate_tfv return curr_lr
def _compute_model_loss( self, input_sequence, output_sequence, sequence_length): """Builds a model with loss for train/eval.""" hparams = self.hparams batch_size = hparams.batch_size self.global_step = tf.train.get_or_create_global_step() input_sequence = tf.to_float(input_sequence) output_sequence = tf.to_float(output_sequence) max_seq_len = tf.minimum(tf.shape(output_sequence)[1], hparams.max_seq_len) input_sequence = input_sequence[:, :max_seq_len] # The target/expected outputs. x_target = output_sequence[:, :max_seq_len] # Inputs to be fed to decoder, including zero padding for the initial input. x_input = tf.pad(output_sequence[:, :max_seq_len - 1], [(0, 0), (1, 0), (0, 0)]) x_length = tf.minimum(sequence_length, max_seq_len) # Either encode to get `z`, or do unconditional, decoder-only. if hparams.conditional: # vae mode: q_z = self.encode(input_sequence, x_length) z = q_z.sample() # Prior distribution. p_z = ds.MultivariateNormalDiag( loc=[0.] * hparams.z_size, scale_diag=[1.] * hparams.z_size) # KL Divergence (nats) kl_div = ds.kl_divergence(q_z, p_z) # Concatenate the Z vectors to the inputs at each time step. else: # unconditional, decoder-only generation kl_div = tf.zeros([batch_size, 1], dtype=tf.float32) z = None r_loss, metric_map = self.decoder.reconstruction_loss( x_input, x_target, x_length, z)[0:2] free_nats = hparams.free_bits * tf.log(2.0) kl_cost = tf.maximum(kl_div - free_nats, 0) beta = ((1.0 - tf.pow(hparams.beta_rate, tf.to_float(self.global_step))) * hparams.max_beta) self.loss = tf.reduce_mean(r_loss) + beta * tf.reduce_mean(kl_cost) scalars_to_summarize = { 'loss': self.loss, 'losses/r_loss': r_loss, 'losses/kl_loss': kl_cost, 'losses/kl_bits': kl_div / tf.log(2.0), 'losses/kl_beta': beta, } return metric_map, scalars_to_summarize
def leaky_twice_relu6(x, alpha_low=0.2, alpha_high=0.2, name="leaky_relu6"): """:func:`leaky_twice_relu6` can be used through its shortcut: :func:`:func:`tl.act.ltrelu6`. This activation function is a modified version :func:`leaky_relu` introduced by the following paper: `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__ This activation function also follows the behaviour of the activation function :func:`tf.nn.relu6` introduced by the following paper: `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__ This function push further the logic by adding `leaky` behaviour both below zero and above six. The function return the following results: - When x < 0: ``f(x) = alpha_low * x``. - When x in [0, 6]: ``f(x) = x``. - When x > 6: ``f(x) = 6 + (alpha_high * (x-6))``. Parameters ---------- x : Tensor Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, ``int16``, or ``int8``. alpha_low : float Slope for x < 0: ``f(x) = alpha_low * x``. alpha_high : float Slope for x < 6: ``f(x) = 6 (alpha_high * (x-6))``. name : str The function name (optional). Examples -------- >>> import tensorlayer as tl >>> net = tl.layers.Input([10, 200]) >>> net = tl.layers.Dense(n_units=100, act=lambda x : tl.act.leaky_twice_relu6(x, 0.2, 0.2), name='dense')(net) Returns ------- Tensor A ``Tensor`` in the same type as ``x``. References ---------- - `Rectifier Nonlinearities Improve Neural Network Acoustic Models [A. L. Maas et al., 2013] <https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`__ - `Convolutional Deep Belief Networks on CIFAR-10 [A. Krizhevsky, 2010] <http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf>`__ """ if not isinstance(alpha_high, tf.Tensor) and not (0 < alpha_high <= 1): raise ValueError("`alpha_high` value must be in [0, 1]`") if not isinstance(alpha_low, tf.Tensor) and not (0 < alpha_low <= 1): raise ValueError("`alpha_low` value must be in [0, 1]`") with tf.name_scope(name) as name_scope: x = tf.convert_to_tensor(x, name="features") x_is_above_0 = tf.minimum(x, 6 * (1 - alpha_high) + alpha_high * x) x_is_below_0 = tf.minimum(alpha_low * x, 0) return tf.maximum(x_is_above_0, x_is_below_0, name=name_scope)
def crappy_plot(val, levels): x_len = val.get_shape().as_list()[1] left_val = tf.concat(1, (val[:, 0:1], val[:, 0:x_len - 1])) right_val = tf.concat(1, (val[:, 1:], val[:, x_len - 1:])) left_mean = (val + left_val) // 2 right_mean = (val + right_val) // 2 low_val = tf.minimum(tf.minimum(left_mean, right_mean), val) high_val = tf.maximum(tf.maximum(left_mean, right_mean), val + 1) return tf.cumsum(tf.one_hot(low_val, levels, axis=1) - tf.one_hot(high_val, levels, axis=1), axis=1)
def intersection(box1, box2): x1_max = tf.maximum(box1[:, 0], box2[:, 0]) y1_max = tf.maximum(box1[:, 1], box2[:, 1]) x2_min = tf.minimum(box1[:, 2], box2[:, 2]) y2_min = tf.minimum(box1[:, 3], box2[:, 3]) x_diff = tf.maximum(x2_min - x1_max, 0) y_diff = tf.maximum(y2_min - y1_max, 0) return x_diff * y_diff
def rnn_decoder(self, true_utt, true_utt_len, initial_state, reuse=False): cell = tf.contrib.rnn.LSTMBlockCell(num_units=self.options.num_rnn_units, use_peephole=False) reduced_size = tf.minimum(tf.shape(true_utt)[1], tf.reduce_max(true_utt_len), name='reduced_size') true_utt_len_clipped = tf.minimum(reduced_size, true_utt_len, name='true_utt_len_clipped') true_utt_reduced = tf.slice(true_utt, [0, 0], [-1, reduced_size]) utt_prev = tf.slice(true_utt_reduced, [0, 0], [-1, tf.shape(true_utt_reduced)[1] - 1]) utt_next = tf.slice(true_utt_reduced, [0, 1], [-1, -1]) # Decoder fc = tf.contrib.layers.fully_connected with tf.variable_scope('decoder', reuse=reuse) as varscope: embeddings = tf.Variable(tf.random_uniform([self.seq_vec.num_types, self.options.embedding_size], -1.0, 1.0), name='embedding') def output_fn(rep): rep_drop = tf.nn.dropout(rep, keep_prob=self.dropout_keep_prob, name='rep_drop') return fc(rep_drop, trainable=True, activation_fn=tf.identity, num_outputs=self.seq_vec.num_types, scope=varscope) utt_embed = tf.nn.embedding_lookup(embeddings, utt_prev, name='utt_embed') utt_embed_drop = tf.nn.dropout(utt_embed, keep_prob=self.dropout_keep_prob, name='utt_embed_drop') outputs, _ = tf.nn.dynamic_rnn(cell, utt_embed_drop, sequence_length=true_utt_len_clipped, initial_state=initial_state, dtype=tf.float32, scope=varscope) next_word_logits = output_fn(outputs) varscope.reuse_variables() decoder_args = [ output_fn, initial_state, embeddings, self.seq_vec.token_indices['<s>'], self.seq_vec.token_indices['</s>'], self.seq_vec.max_len, self.options.num_rnn_units, ] decoder_predict = tfutils.simple_decoder_fn_inference(*decoder_args, name='decoder_predict') decoder_sample = tfutils.simple_decoder_fn_inference(*decoder_args, sample=True, name='decoder_sample') predictions, _ = tfutils.dynamic_rnn_decoder(cell, sequence_lengths=true_utt_len_clipped, decoder_fn=decoder_predict, scope=varscope) samples, _ = tfutils.dynamic_rnn_decoder(cell, sequence_lengths=true_utt_len_clipped, decoder_fn=decoder_sample, scope=varscope) return (next_word_logits, utt_next, true_utt_len_clipped, predictions, samples, varscope)
def upper_bound(inputs, bound, gradient="identity_if_towards", name=None): """Same as `tf.minimum`, but with helpful gradient for `inputs > bound`. This function behaves just like `tf.minimum`, but the behavior of the gradient with respect to `inputs` for input values that hit the bound depends on `gradient`: If set to `'disconnected'`, the returned gradient is zero for values that hit the bound. This is identical to the behavior of `tf.minimum`. If set to `'identity'`, the gradient is unconditionally replaced with the identity function (i.e., pretending this function does not exist). If set to `'identity_if_towards'`, the gradient is replaced with the identity function, but only if applying gradient descent would push the values of `inputs` towards the bound. For gradient values that push away from the bound, the returned gradient is still zero. Note: In the latter two cases, no gradient is returned for `bound`. Also, the implementation of `gradient == 'identity_if_towards'` currently assumes that the shape of `inputs` is the same as the shape of the output. It won't work reliably for all possible broadcasting scenarios. Args: inputs: Input tensor. bound: Upper bound for the input tensor. gradient: 'disconnected', 'identity', or 'identity_if_towards' (default). name: Name for this op. Returns: `tf.minimum(inputs, bound)` Raises: ValueError: for invalid value of `gradient`. """ try: gradient = { "identity_if_towards": "UpperBound", "identity": "IdentityFirstOfTwoInputs", "disconnected": None, }[gradient] except KeyError: raise ValueError("Invalid value for `gradient`: '{}'.".format(gradient)) with tf.name_scope(name, "UpperBound", [inputs, bound]) as scope: inputs = tf.convert_to_tensor(inputs, name="inputs") bound = tf.convert_to_tensor( bound, name="bound", dtype=inputs.dtype) if gradient: with tf.get_default_graph().gradient_override_map({"Minimum": gradient}): return tf.minimum(inputs, bound, name=scope) else: return tf.minimum(inputs, bound, name=scope)
def intersection_with_anchors(bbox): """Compute intersection between score a box and the anchors. """ int_ymin = tf.maximum(ymin, bbox[0]) int_xmin = tf.maximum(xmin, bbox[1]) int_ymax = tf.minimum(ymax, bbox[2]) int_xmax = tf.minimum(xmax, bbox[3]) h = tf.maximum(int_ymax - int_ymin, 0.) w = tf.maximum(int_xmax - int_xmin, 0.) inter_vol = h * w scores = tf.div(inter_vol, vol_anchors) return scores
def preprocess_for_eval(image, labels, bboxes, out_shape, resize, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. out_shape: Output shape after pre-processing (if resize != None) resize: Resize strategy. Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat(0, [bbox_img, bboxes]) # Resize strategy... if resize == Resize.NONE: pass elif resize == Resize.CENTRAL_CROP: image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum(tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image(image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] return image, labels, bboxes, bbox_img
def get_q(state_ph,var_dict): mid = state_ph mid = tf.reshape(mid, [-1,9]) mid = tf.matmul(mid,var_dict['w0']) mid = mid + var_dict['b1'] mid = tf.maximum(mid, -1.0) mid = tf.minimum(mid, 1.0) mid = tf.matmul(mid,var_dict['w2']) mid = mid + var_dict['b3'] mid = tf.maximum(mid, -1.0) mid = tf.minimum(mid, 1.0) mid = tf.matmul(mid,var_dict['w4']) mid = mid + var_dict['b5'] * tf.constant(0.05) return mid
def update_variable(self, var, grad_var): """Update the variable and its slots.""" params = self.params global_step = tf.to_float(self.global_step) + 1 # compute learning rate lrate = params.learning_rate if params.learning_rate_decay_scheme == "noam": lrate *= tf.minimum(global_step * params.learning_rate_warmup_steps**-1.5, global_step**-0.5) else: assert params.learning_rate_decay_scheme == "none" lrate *= tf.minimum(global_step / params.learning_rate_warmup_steps, 1.0) # compute adjustment due to second moment slots = params.slots[var.op.name] grad_squared = tf.square(grad_var) beta2_pow = tf.pow(params.beta2, global_step) if params.factored_second_moment_accumulator and len(var.shape) == 2: vr_update = tf.assign(slots["adam_vr"], slots["adam_vr"] * params.beta2 + tf.reduce_mean(grad_squared, 1, keepdims=True) * (1.0 - params.beta2)) vc_update = tf.assign(slots["adam_vc"], slots["adam_vc"] * params.beta2 + tf.reduce_mean(grad_squared, 0, keepdims=True) * (1.0 - params.beta2)) with tf.control_dependencies([vr_update, vc_update]): vr = tf.sqrt(slots["adam_vr"] / (1.0 - beta2_pow)) + params.epsilon vc = tf.sqrt(slots["adam_vc"] / (1.0 - beta2_pow)) + params.epsilon vc /= tf.reduce_mean(vc) denom = vr * vc else: v_update = tf.assign(slots["adam_v"], slots["adam_v"] * params.beta2 + grad_squared * (1.0 - params.beta2)) with tf.control_dependencies([v_update]): denom = tf.sqrt(slots["adam_v"] / (1.0 - beta2_pow)) + params.epsilon # compute momentum if applicable if params.beta1 != 0.0: m_update = tf.assign(slots["adam_m"], slots["adam_m"] * params.beta1 + grad_var * (1.0 - params.beta1)) with tf.control_dependencies([m_update]): grad_var = slots["adam_m"] # update var subtrahend = lrate * grad_var / denom new_val = _quantize(_dequantize(var, params) - subtrahend, params) return tf.assign(var, new_val)
def __init__(self, gan=None, config=None, trainer=None, name="WeightPenaltyTrainHook", memory_size=2, top_k=1): super().__init__(config=config, gan=gan, trainer=trainer, name=name) d_losses = [] weights = self.gan.weights() if config.only_d: weights = self.discriminator.weights() if config.l2nn_penalty: l2nn_penalties = [] if len(weights) > 0: for w in weights: w = tf.reshape(w, [-1, self.ops.shape(w)[-1]]) wt = tf.transpose(w) wtw = tf.matmul(wt,w) wwt = tf.matmul(w,wt) def _l(m): m = tf.abs(m) m = tf.reduce_sum(m, axis=0,keep_dims=True) m = tf.maximum(m-1, 0) m = tf.reduce_max(m, axis=1,keep_dims=True) return m l2nn_penalties.append(tf.minimum(_l(wtw), _l(wwt))) print('l2nn_penalty', self.config.l2nn_penalty, l2nn_penalties) l2nn_penalty = self.config.l2nn_penalty * tf.add_n(l2nn_penalties) self.add_metric('l2nn_penalty', self.gan.ops.squash(l2nn_penalty)) d_losses.append(l2nn_penalty) if config.ortho_penalty: penalties = [] for w in self.gan.weights(): print("PENALTY", w) w = tf.reshape(w, [-1, self.ops.shape(w)[-1]]) wt = tf.transpose(w) wtw = tf.matmul(wt,w) wwt = tf.matmul(w,wt) mwtw = tf.matmul(w, wtw) mwwt = tf.matmul(wt, wwt) def _l(w,m): l = tf.reduce_mean(tf.abs(w - m)) l = self.ops.squash(l) return l penalties.append(tf.minimum(_l(w, mwtw), _l(wt, mwwt))) penalty = self.config.ortho_penalty * tf.add_n(penalties) self.add_metric('ortho_penalty', self.gan.ops.squash(penalty)) print("PENALTY", penalty) penalty = tf.reshape(penalty, [1,1]) penalty = tf.tile(penalty, [self.gan.batch_size(), 1]) d_losses.append(penalty) self.loss = self.ops.squash(d_losses)
def train_continuous(self, BATCH, isw, cell_state): with tf.device(self.device): with tf.GradientTape(persistent=True) as tape: feat, _ = self._representation_net(BATCH.obs, cell_state=cell_state) v = self.v_net.value_net(feat) v_target, _ = self.v_target_net(BATCH.obs_, cell_state=cell_state) if self.is_continuous: mu, log_std = self.actor_net.value_net(feat) pi, log_pi = squash_rsample(mu, log_std) entropy = gaussian_entropy(log_std) else: logits = self.actor_net.value_net(feat) logp_all = tf.nn.log_softmax(logits) gumbel_noise = tf.cast(self.gumbel_dist.sample( BATCH.action.shape), dtype=tf.float32) _pi = tf.nn.softmax( (logp_all + gumbel_noise) / self.discrete_tau) _pi_true_one_hot = tf.one_hot(tf.argmax(_pi, axis=-1), self.a_dim) _pi_diff = tf.stop_gradient(_pi_true_one_hot - _pi) pi = _pi_diff + _pi log_pi = tf.reduce_sum(tf.multiply(logp_all, pi), axis=1, keepdims=True) entropy = -tf.reduce_mean( tf.reduce_sum(tf.exp(logp_all) * logp_all, axis=1, keepdims=True)) q1, q2 = self.q_net.get_value(feat, BATCH.action) q1_pi, q2_pi = self.q_net.get_value(feat, pi) dc_r = tf.stop_gradient(BATCH.reward + self.gamma * v_target * (1 - BATCH.done)) v_from_q_stop = tf.stop_gradient( tf.minimum(q1_pi, q2_pi) - self.alpha * log_pi) td_v = v - v_from_q_stop td_error1 = q1 - dc_r td_error2 = q2 - dc_r q1_loss = tf.reduce_mean(tf.square(td_error1) * isw) q2_loss = tf.reduce_mean(tf.square(td_error2) * isw) v_loss_stop = tf.reduce_mean(tf.square(td_v) * isw) critic_loss = 0.5 * q1_loss + 0.5 * q2_loss + 0.5 * v_loss_stop actor_loss = -tf.reduce_mean(q1_pi - self.alpha * log_pi) if self.auto_adaption: alpha_loss = -tf.reduce_mean( self.alpha * tf.stop_gradient(log_pi + self.target_entropy)) actor_grads = tape.gradient(actor_loss, self.actor_net.trainable_variables) self.optimizer_actor.apply_gradients( zip(actor_grads, self.actor_net.trainable_variables)) critic_grads = tape.gradient( critic_loss, self.q_net.trainable_variables + self.v_net.trainable_variables) self.optimizer_critic.apply_gradients( zip( critic_grads, self.q_net.trainable_variables + self.v_net.trainable_variables)) if self.auto_adaption: alpha_grad = tape.gradient(alpha_loss, self.log_alpha) self.optimizer_alpha.apply_gradients([(alpha_grad, self.log_alpha)]) self.global_step.assign_add(1) summaries = dict( [['LOSS/actor_loss', actor_loss], ['LOSS/q1_loss', q1_loss], ['LOSS/q2_loss', q2_loss], ['LOSS/v_loss', v_loss_stop], ['LOSS/critic_loss', critic_loss], ['Statistics/log_alpha', self.log_alpha], ['Statistics/alpha', self.alpha], ['Statistics/entropy', entropy], ['Statistics/q_min', tf.reduce_min(tf.minimum(q1, q2))], ['Statistics/q_mean', tf.reduce_mean(tf.minimum(q1, q2))], ['Statistics/q_max', tf.reduce_max(tf.maximum(q1, q2))], ['Statistics/v_mean', tf.reduce_mean(v)]]) if self.auto_adaption: summaries.update({'LOSS/alpha_loss': alpha_loss}) return (td_error1 + td_error2) / 2, summaries
def minimum(a, b): y = tf.minimum(a, b) print(y)
def train_discrete(self, BATCH, isw, cell_state): with tf.device(self.device): with tf.GradientTape(persistent=True) as tape: feat, _ = self._representation_net(BATCH.obs, cell_state=cell_state) v = self.v_net.value_net(feat) # [B, 1] v_target, _ = self.v_target_net( BATCH.obs_, cell_state=cell_state) # [B, 1] q1_all, q2_all = self.q_net.get_value(feat) # [B, A] def q_function(x): return tf.reduce_sum(x * BATCH.action, axis=-1, keepdims=True) # [B, 1] q1 = q_function(q1_all) q2 = q_function(q2_all) logits = self.actor_net.value_net(feat) # [B, A] logp_all = tf.nn.log_softmax(logits) # [B, A] entropy = -tf.reduce_sum(tf.exp(logp_all) * logp_all, axis=1, keepdims=True) # [B, 1] q_all = self.q_net.get_min(feat) # [B, A] actor_loss = -tf.reduce_mean( tf.reduce_sum((q_all - self.alpha * logp_all) * tf.exp(logp_all)) # [B, A] => [B,] ) dc_r = tf.stop_gradient(BATCH.reward + self.gamma * v_target * (1 - BATCH.done)) td_v = v - tf.stop_gradient( tf.minimum( tf.reduce_sum(tf.exp(logp_all) * q1_all, axis=-1), tf.reduce_sum(tf.exp(logp_all) * q2_all, axis=-1))) td_error1 = q1 - dc_r td_error2 = q2 - dc_r q1_loss = tf.reduce_mean(tf.square(td_error1) * isw) q2_loss = tf.reduce_mean(tf.square(td_error2) * isw) v_loss_stop = tf.reduce_mean(tf.square(td_v) * isw) critic_loss = 0.5 * q1_loss + 0.5 * q2_loss + 0.5 * v_loss_stop if self.auto_adaption: corr = tf.stop_gradient(self.target_entropy - entropy) # corr = tf.stop_gradient(tf.reduce_sum((logp_all - self.a_dim) * tf.exp(logp_all), axis=-1)) #[B, A] => [B,] alpha_loss = -tf.reduce_mean(self.alpha * corr) critic_grads = tape.gradient( critic_loss, self.q_net.trainable_variables + self.v_net.trainable_variables) self.optimizer_critic.apply_gradients( zip( critic_grads, self.q_net.trainable_variables + self.v_net.trainable_variables)) actor_grads = tape.gradient(actor_loss, self.actor_net.trainable_variables) self.optimizer_actor.apply_gradients( zip(actor_grads, self.actor_net.trainable_variables)) if self.auto_adaption: alpha_grad = tape.gradient(alpha_loss, self.log_alpha) self.optimizer_alpha.apply_gradients([(alpha_grad, self.log_alpha)]) self.global_step.assign_add(1) summaries = dict([['LOSS/actor_loss', actor_loss], ['LOSS/q1_loss', q1_loss], ['LOSS/q2_loss', q2_loss], ['LOSS/v_loss', v_loss_stop], ['LOSS/critic_loss', critic_loss], ['Statistics/log_alpha', self.log_alpha], ['Statistics/alpha', self.alpha], ['Statistics/entropy', tf.reduce_mean(entropy)], ['Statistics/v_mean', tf.reduce_mean(v)]]) if self.auto_adaption: summaries.update({'LOSS/alpha_loss': alpha_loss}) return (td_error1 + td_error2) / 2, summaries
import tflearn from oxnnet.data_loader import StandardDataLoaderPowerDoppler from oxnnet.record import RecordWriter, PowerDopplerProcessTup, RecordReader from oxnnet.full_inferer import PowerDopplerFullInferer from oxnnet.feats_writer import StandardFeatsWriter train_eval_test_no = [75, 15, 10] segment_size_in = np.array([64] * 3) segment_size_out = segment_size_in crop_by = 0 stride = np.array([32] * 3, dtype=np.int) data_loader = StandardDataLoaderPowerDoppler(stride, segment_size_in) #https://github.com/caglar/noisy_units/blob/master/codes/tf/nunits.py HardTanh = lambda x: tf.minimum(tf.maximum(x, -1.), 1.) lin_sigmoid = lambda x: 0.25 * x + 0.5 # Sigmoid = lambda x, use_noise=0: T.nnet.sigmoid(x) HardSigmoid = lambda x, angle=0.25: tf.maximum( tf.minimum(angle * x + 0.5, 1.0), 0.0) HardSigmoid = lambda x: tf.minimum(tf.maximum(lin_sigmoid(x), 0.), 1.) def NTanh(x, use_noise, alpha=1.05, c=0.5, half_normal=False): """ Noisy Hard Tanh Units: NAN without learning p ---------------------------------------------------- Arguments: x: tensorflow tensor variable, input of the function. use_noise: bool, whether to add noise or not to the activations, this is in particular useful for the test time, in order to disable the noise injection.
def _build(self, state_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for state input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Sampled action. tf.Tensor: Mean. tf.Tensor: Parameterized log_std. tf.Tensor: log_std. garage.tf.distributions.DiagonalGaussian: Policy distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an CNN b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_conv = cnn( input_var=state_input, filter_dims=self._filter_dims, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, num_filters=self._num_filters, strides=self._strides, padding=self._padding, name='mean_std_cnn') mean_std_network = mlp( mean_std_conv, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_conv = cnn(input_var=state_input, filter_dims=self._filter_dims, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, num_filters=self._num_filters, strides=self._strides, padding=self._padding, name='mean_cnn') mean_network = mlp( mean_conv, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_conv = cnn( input_var=state_input, filter_dims=self._std_filter_dims, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, num_filters=self._std_num_filters, strides=self._std_strides, padding=self._std_padding, name='log_std_cnn') log_std_network = mlp( log_std_conv, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) dist = DiagonalGaussian(self._output_dim) rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:]) action_var = rnd * tf.exp(log_std_var) + mean_var return action_var, mean_var, log_std_var, std_param, dist
def huber_loss(error, delta): abs_error = tf.abs(error) quadratic = tf.minimum(abs_error, delta) linear = (abs_error - quadratic) losses = 0.5 * quadratic**2 + delta * linear return tf.reduce_mean(losses)
def deg2rad(x): x = x * np.pi / 180.0 x = tf.mod(x, tf.constant(2 * np.pi, dtype=x.dtype)) x = tf.minimum(x, 2 * np.pi - x) * tf.sign(np.pi - x) return x
def sigmoid_hard(self, x): """Hard sigmoid.""" return tf.minimum(1.0, tf.maximum(0.0, 0.25 * x + 0.5))
l_1 = 0.0 l_2 = ((max_non_target_cls_logits - (target_cls_logits - margin))**2) / margin l_3 = max_non_target_cls_logits - (target_cls_logits - margin) # max_non_correct_cls = tf.reduce_max(softmax-tf.one_hot(labels,NUM_CLASSES),axis=-1) # margin =0.05 # l_1= 0.0 # l_2 = ((max_non_correct_cls-(corret_cls_prob-margin))**2)/margin # l_3 =max_non_correct_cls -(corret_cls_prob-margin) # prob_diff = corret_cls_prob - max_non_correct_cls ce_loss = tf.maximum(l_1, tf.minimum(l_2, l_3)) # ce_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=model_logits)) # ce_loss = 10*tf.maximum(0.0, - (max_non_correct_cls -corret_cls_prob) +0.05) # ce_loss = 10(corret_cls_prob) ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=model_logits) # ce_loss_mean = tf.reduce_mean(labels_coeff *ce_loss) ce_loss_mean = tf.reduce_mean(ce_loss) beta_0_default = tf.constant(1, dtype=tf.float32) beta_0 = tf.placeholder_with_default(beta_0_default, name='beta_0', shape=beta_0_default.shape)
def _generate(self, feature_map_shape_list, im_height=1, im_width=1): """Generates a collection of bounding boxes to be used as anchors. The number of anchors generated for a single grid with shape MxM where we place k boxes over each grid center is k*M^2 and thus the total number of anchors is the sum over all grids. In our box_specs_list example (see the constructor docstring), we would place two boxes over each grid point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the output anchors follows the order of how the grid sizes and box_specs are specified (with box_spec index varying the fastest, followed by width index, then height index, then grid index). Args: feature_map_shape_list: list of pairs of convnet layer resolutions in the format [(height_0, width_0), (height_1, width_1), ...]. For example, setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that correspond to an 8x8 layer followed by a 7x7 layer. im_height: the height of the image to generate the grid for. If both im_height and im_width are 1, the generated anchors default to absolute coordinates, otherwise normalized coordinates are produced. im_width: the width of the image to generate the grid for. If both im_height and im_width are 1, the generated anchors default to absolute coordinates, otherwise normalized coordinates are produced. Returns: boxes_list: a list of BoxLists each holding anchor boxes corresponding to the input feature map shapes. Raises: ValueError: if feature_map_shape_list, box_specs_list do not have the same length. ValueError: if feature_map_shape_list does not consist of pairs of integers """ if not (isinstance(feature_map_shape_list, list) and len(feature_map_shape_list) == len(self._box_specs)): raise ValueError( 'feature_map_shape_list must be a list with the same ' 'length as self._box_specs') if not all([ isinstance(list_item, tuple) and len(list_item) == 2 for list_item in feature_map_shape_list ]): raise ValueError('feature_map_shape_list must be a list of pairs.') im_height = tf.to_float(im_height) im_width = tf.to_float(im_width) if not self._anchor_strides: anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1])) for pair in feature_map_shape_list] else: anchor_strides = [(tf.to_float(stride[0]) / im_height, tf.to_float(stride[1]) / im_width) for stride in self._anchor_strides] if not self._anchor_offsets: anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1]) for stride in anchor_strides] else: anchor_offsets = [(tf.to_float(offset[0]) / im_height, tf.to_float(offset[1]) / im_width) for offset in self._anchor_offsets] for arg, arg_name in zip([anchor_strides, anchor_offsets], ['anchor_strides', 'anchor_offsets']): if not (isinstance(arg, list) and len(arg) == len(self._box_specs)): raise ValueError('%s must be a list with the same length ' 'as self._box_specs' % arg_name) if not all([ isinstance(list_item, tuple) and len(list_item) == 2 for list_item in arg ]): raise ValueError('%s must be a list of pairs.' % arg_name) anchor_grid_list = [] min_im_shape = tf.minimum(im_height, im_width) scale_height = min_im_shape / im_height scale_width = min_im_shape / im_width base_anchor_size = [ scale_height * self._base_anchor_size[0], scale_width * self._base_anchor_size[1] ] for feature_map_index, (grid_size, scales, aspect_ratios, stride, offset) in enumerate( zip(feature_map_shape_list, self._scales, self._aspect_ratios, anchor_strides, anchor_offsets)): tiled_anchors = grid_anchor_generator.tile_anchors( grid_height=grid_size[0], grid_width=grid_size[1], scales=scales, aspect_ratios=aspect_ratios, base_anchor_size=base_anchor_size, anchor_stride=stride, anchor_offset=offset) if self._clip_window is not None: tiled_anchors = box_list_ops.clip_to_window( tiled_anchors, self._clip_window, filter_nonoverlapping=False) num_anchors_in_layer = tiled_anchors.num_boxes_static() if num_anchors_in_layer is None: num_anchors_in_layer = tiled_anchors.num_boxes() anchor_indices = feature_map_index * tf.ones( [num_anchors_in_layer]) tiled_anchors.add_field('feature_map_index', anchor_indices) anchor_grid_list.append(tiled_anchors) return anchor_grid_list
def setup_model(self): print("setup_model : ppo_imitation.py -> PPOImitation.setup_model()") traceback.print_stack() with SetVerbosity(self.verbose): self.graph = tf.Graph() with self.graph.as_default(): self.set_random_seed(self.seed) self.sess = tf_util.make_session(num_cpu=self.n_cpu_tf_sess, graph=self.graph) # Construct network for new policy self.policy_pi = self.policy(self.sess, self.observation_space, self.action_space, self.n_envs, 1, None, reuse=False, **self.policy_kwargs) # Network for old policy with tf.variable_scope("oldpi", reuse=False): old_pi = self.policy(self.sess, self.observation_space, self.action_space, self.n_envs, 1, None, reuse=False, **self.policy_kwargs) with tf.variable_scope("loss", reuse=False): # Target advantage function (if applicable) atarg = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return ret = tf.placeholder(dtype=tf.float32, shape=[None]) # learning rate multiplier, updated with schedule lrmult = tf.placeholder(name='lrmult', dtype=tf.float32, shape=[]) # Annealed cliping parameter epislon clip_param = self.clip_param * lrmult obs_ph = self.policy_pi.obs_ph action_ph = self.policy_pi.pdtype.sample_placeholder([None]) kloldnew = old_pi.proba_distribution.kl(self.policy_pi.proba_distribution) ent = self.policy_pi.proba_distribution.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) pol_entpen = (-self.entcoeff) * meanent # pnew / pold ratio = tf.exp(self.policy_pi.proba_distribution.logp(action_ph) - old_pi.proba_distribution.logp(action_ph)) # surrogate from conservative policy iteration surr1 = ratio * atarg surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg clip_frac = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio - 1.0), clip_param))) # PPO's pessimistic surrogate (L^CLIP) pol_surr = - tf.reduce_mean(tf.minimum(surr1, surr2)) vf_loss = tf.reduce_mean(tf.square(self.policy_pi.value_flat - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] self.loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] tf.summary.scalar('entropy_loss', pol_entpen) tf.summary.scalar('policy_gradient_loss', pol_surr) tf.summary.scalar('value_function_loss', vf_loss) tf.summary.scalar('approximate_kullback-leibler', meankl) tf.summary.scalar('clip_factor', clip_param) tf.summary.scalar('loss', total_loss) tf.summary.scalar('clip_frac', clip_frac) self.params = tf_util.get_trainable_vars("model") self.assign_old_eq_new = tf_util.function( [], [], updates=[tf.assign(oldv, newv) for (oldv, newv) in zipsame(tf_util.get_globals_vars("oldpi"), tf_util.get_globals_vars("model"))]) with tf.variable_scope("Adam_mpi", reuse=False): self.adam = MpiAdam(self.params, epsilon=self.adam_epsilon, sess=self.sess) with tf.variable_scope("input_info", reuse=False): tf.summary.scalar('discounted_rewards', tf.reduce_mean(ret)) tf.summary.scalar('learning_rate', tf.reduce_mean(self.optim_stepsize)) tf.summary.scalar('advantage', tf.reduce_mean(atarg)) tf.summary.scalar('clip_range', tf.reduce_mean(self.clip_param)) if self.full_tensorboard_log: tf.summary.histogram('discounted_rewards', ret) tf.summary.histogram('learning_rate', self.optim_stepsize) tf.summary.histogram('advantage', atarg) tf.summary.histogram('clip_range', self.clip_param) if tf_util.is_image(self.observation_space): tf.summary.image('observation', obs_ph) else: tf.summary.histogram('observation', obs_ph) self.step = self.policy_pi.step self.proba_step = self.policy_pi.proba_step self.initial_state = self.policy_pi.initial_state tf_util.initialize(sess=self.sess) self.summary = tf.summary.merge_all() self.lossandgrad = tf_util.function([obs_ph, old_pi.obs_ph, action_ph, atarg, ret, lrmult], [self.summary, tf_util.flatgrad(total_loss, self.params)] + losses) self.compute_losses = tf_util.function([obs_ph, old_pi.obs_ph, action_ph, atarg, ret, lrmult], losses) return
def define_model(self): self.non_zero_samples_num = tf.shape(self.training_ints)[0] self.total_samples_num = self.non_zero_samples_num mini_batch_size = tf.minimum(self.total_samples_num, self.mini_batch_size) if self.factor_max_iter: self.max_iters = tf.maximum(self.abs_max_iters, tf.cast(tf.ceil( tf.cast(self.given_max_iters, tf.float64) * (tf.cast(self.total_samples_num, tf.float64) / tf.cast(mini_batch_size, tf.float64))), tf.int32)) else: self.max_iters = tf.constant(self.given_max_iters) self.non_zero_batch_size = tf.cast(mini_batch_size, tf.int32) n_total_p = 2 + 2 * self.n_vis_t_params new_r, new_dist_param, new_b1_v_params, new_b2_v_params, \ _, _, _, _, _, _, _, _, _, _, _, _, _, _, req_step, last_alpha, \ self.max_ll, self.extra_took_steps = \ tf.while_loop(self.model_cond, self.general_model_body, [ self.r, self.dist_param, self.b1_v_params, self.b2_v_params, tf.zeros([n_total_p], dtype=tf.float64), tf.zeros([n_total_p], dtype=tf.float64), tf.constant(-10000.0, dtype=tf.float64), tf.constant(0, dtype=tf.int32), tf.zeros([n_total_p], dtype=tf.float64), self.r, self.dist_param, self.b1_v_params, self.b2_v_params, tf.zeros([n_total_p], dtype=tf.float64), tf.zeros([n_total_p], dtype=tf.float64), tf.constant(-20000.0, dtype=tf.float64), tf.constant(0, dtype=tf.int32), tf.zeros([n_total_p], dtype=tf.float64), tf.constant(0, dtype=tf.int32), self.set_alpha_init, tf.constant(-20000.0, dtype=tf.float64), tf.constant(0, dtype=tf.int32) ], shape_invariants=[ tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([self.n_vis_t_params]), tf.TensorShape([self.n_vis_t_params]), tf.TensorShape([n_total_p]), tf.TensorShape([n_total_p]), tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([n_total_p]), tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([self.n_vis_t_params]), tf.TensorShape([self.n_vis_t_params]), tf.TensorShape([n_total_p]), tf.TensorShape([n_total_p]), tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([n_total_p]), tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([]) ], parallel_iterations=1) self.r_assign = self.r.assign(new_r) self.f_assign = self.dist_param.assign(new_dist_param) self.req_step = req_step self.last_alpha = last_alpha if self.equal_v_params: self.b1_v_params_assign = self.b1_v_params.assign(new_b1_v_params) self.b2_v_params_assign = tf.zeros([], dtype=tf.int32) else: self.b1_v_params_assign = self.b1_v_params.assign(new_b1_v_params) self.b2_v_params_assign = self.b2_v_params.assign(new_b2_v_params)
def xdet_model_fn(features, labels, mode, params): """Our model_fn for ResNet to be used with our Estimator.""" num_anchors_list = labels['num_anchors_list'] num_feature_layers = len(num_anchors_list) shape = labels['targets'][-1] if mode != tf.estimator.ModeKeys.TRAIN: org_image = labels['targets'][-2] isdifficult = labels['targets'][-3] bbox_img = labels['targets'][-4] gbboxes_raw = labels['targets'][-5] glabels_raw = labels['targets'][-6] glabels = labels['targets'][:num_feature_layers][0] gtargets = labels['targets'][num_feature_layers : 2 * num_feature_layers][0] gscores = labels['targets'][2 * num_feature_layers : 3 * num_feature_layers][0] with tf.variable_scope(params['model_scope'], default_name = None, values = [features], reuse=tf.AUTO_REUSE): backbone = xdet_body_v3.xdet_resnet_v3(params['resnet_size'], params['data_format']) body_cls_output, body_regress_output = backbone(inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN)) cls_pred, location_pred = xdet_body_v3.xdet_head(body_cls_output, body_regress_output, params['num_classes'], num_anchors_list[0], (mode == tf.estimator.ModeKeys.TRAIN), data_format=params['data_format']) if params['data_format'] == 'channels_first': cls_pred = tf.transpose(cls_pred, [0, 2, 3, 1]) location_pred = tf.transpose(location_pred, [0, 2, 3, 1]) #org_image = tf.transpose(org_image, [0, 2, 3, 1]) # batch size is 1 shape = tf.squeeze(shape, axis = 0) glabels = tf.squeeze(glabels, axis = 0) gtargets = tf.squeeze(gtargets, axis = 0) gscores = tf.squeeze(gscores, axis = 0) cls_pred = tf.squeeze(cls_pred, axis = 0) location_pred = tf.squeeze(location_pred, axis = 0) if mode != tf.estimator.ModeKeys.TRAIN: org_image = tf.squeeze(org_image, axis = 0) isdifficult = tf.squeeze(isdifficult, axis = 0) gbboxes_raw = tf.squeeze(gbboxes_raw, axis = 0) glabels_raw = tf.squeeze(glabels_raw, axis = 0) bbox_img = tf.squeeze(bbox_img, axis = 0) bboxes_pred = labels['decode_fn'](location_pred)#(tf.reshape(location_pred, location_pred.get_shape().as_list()[:-1] + [-1, 4]))#(location_pred)# eval_ops, save_image_op = bboxes_eval(org_image, shape, bbox_img, cls_pred, bboxes_pred, glabels_raw, gbboxes_raw, isdifficult, params['num_classes']) _ = tf.identity(save_image_op, name='save_image_with_bboxes_op') cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']]) location_pred = tf.reshape(location_pred, [-1, 4]) glabels = tf.reshape(glabels, [-1]) gscores = tf.reshape(gscores, [-1]) gtargets = tf.reshape(gtargets, [-1, 4]) # raw mask for positive > 0.5, and for negetive < 0.3 # each positive examples has one label positive_mask = glabels > 0#tf.logical_and(glabels > 0, gscores > params['match_threshold']) fpositive_mask = tf.cast(positive_mask, tf.float32) n_positives = tf.reduce_sum(fpositive_mask) # negtive examples are those max_overlap is still lower than neg_threshold, note that some positive may also has lower jaccard # note those gscores is 0 is either be ignored during anchors encode or anchors have 0 overlap with all ground truth #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(tf.logical_or(positive_mask, glabels < 0)), gscores < params['neg_threshold']), gscores > 0.) negtive_mask = tf.logical_and(tf.equal(glabels, 0), gscores > 0.) #negtive_mask = tf.logical_and(tf.logical_and(tf.logical_not(positive_mask), gscores < params['neg_threshold']), gscores > 0.) #negtive_mask = tf.logical_and(gscores < params['neg_threshold'], tf.logical_not(positive_mask)) fnegtive_mask = tf.cast(negtive_mask, tf.float32) n_negtives = tf.reduce_sum(fnegtive_mask) n_neg_to_select = tf.cast(params['negative_ratio'] * n_positives, tf.int32) n_neg_to_select = tf.minimum(n_neg_to_select, tf.cast(n_negtives, tf.int32)) # hard negative mining for classification predictions_for_bg = tf.nn.softmax(cls_pred)[:, 0] prob_for_negtives = tf.where(negtive_mask, 0. - predictions_for_bg, # ignore all the positives 0. - tf.ones_like(predictions_for_bg)) topk_prob_for_bg, _ = tf.nn.top_k(prob_for_negtives, k=n_neg_to_select) selected_neg_mask = prob_for_negtives > topk_prob_for_bg[-1] # # random select negtive examples for classification # selected_neg_mask = tf.random_uniform(tf.shape(gscores), minval=0, maxval=1.) < tf.where( # tf.greater(n_negtives, 0), # tf.divide(tf.cast(n_neg_to_select, tf.float32), n_negtives), # tf.zeros_like(tf.cast(n_neg_to_select, tf.float32)), # name='rand_select_negtive') # include both selected negtive and all positive examples final_mask = tf.stop_gradient(tf.logical_or(tf.logical_and(negtive_mask, selected_neg_mask), positive_mask)) total_examples = tf.reduce_sum(tf.cast(final_mask, tf.float32)) # add mask for glabels and cls_pred here glabels = tf.boolean_mask(tf.clip_by_value(glabels, 0, FLAGS.num_classes), tf.stop_gradient(final_mask)) cls_pred = tf.boolean_mask(cls_pred, tf.stop_gradient(final_mask)) location_pred = tf.boolean_mask(location_pred, tf.stop_gradient(positive_mask)) gtargets = tf.boolean_mask(gtargets, tf.stop_gradient(positive_mask)) # Calculate loss, which includes softmax cross entropy and L2 regularization. cross_entropy = tf.cond(n_positives > 0., lambda: tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred), lambda: 0.) #cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy, name='cross_entropy_loss') tf.summary.scalar('cross_entropy_loss', cross_entropy) loc_loss = tf.cond(n_positives > 0., lambda: modified_smooth_l1(location_pred, tf.stop_gradient(gtargets), sigma=1.), lambda: tf.zeros_like(location_pred)) #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets)) loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1)) loc_loss = tf.identity(loc_loss, name='location_loss') tf.summary.scalar('location_loss', loc_loss) tf.losses.add_loss(loc_loss) with tf.control_dependencies([save_image_op]): # Add weight decay to the loss. We exclude the batch norm variables because # doing so leads to a small improvement in accuracy. loss = cross_entropy + loc_loss + params['weight_decay'] * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'batch_normalization' not in v.name]) total_loss = tf.identity(loss, name='total_loss') predictions = { 'classes': tf.argmax(cls_pred, axis=-1), 'probabilities': tf.reduce_max(tf.nn.softmax(cls_pred, name='softmax_tensor'), axis=-1), 'bboxes_predict': tf.reshape(bboxes_pred, [-1, 4]), 'saved_image_index': save_image_op } summary_hook = tf.train.SummarySaverHook( save_secs=FLAGS.save_summary_steps, output_dir=FLAGS.model_dir, summary_op=tf.summary.merge_all()) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, evaluation_hooks = [summary_hook], loss=loss, eval_metric_ops=eval_ops)#=eval_ops) else: raise ValueError('This script only support predict mode!')
def call(self, inputs): # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords boxes = inputs[0] # Image meta # Holds details about the image. See compose_image_meta() image_meta = inputs[1] # Feature Maps. List of feature maps from different level of the # feature pyramid. Each is [batch, height, width, channels] feature_maps = inputs[2:] # Assign each ROI to a level in the pyramid based on the ROI area. y1, x1, y2, x2 = tf.split(boxes, 4, axis=2) h = y2 - y1 w = x2 - x1 # Use shape of first image. Images in a batch must have the same size. image_shape = parse_image_meta_graph(image_meta)['image_shape'][0] # Equation 1 in the Feature Pyramid Networks paper. Account for # the fact that our coordinates are normalized here. # e.g. a 224x224 ROI (in pixels) maps to P4 image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32) roi_level = log2_graph(tf.sqrt(h * w) / (128.0 / tf.sqrt(image_area))) roi_level = tf.minimum(4, tf.maximum( 2, 3 + tf.cast(tf.round(roi_level), tf.int32))) roi_level = tf.squeeze(roi_level, 2) # Loop through levels and apply ROI pooling to each. P2 to P5. pooled = [] box_to_level = [] for i, level in enumerate(range(2, 5)): ix = tf.where(tf.equal(roi_level, level)) level_boxes = tf.gather_nd(boxes, ix) # Box indices for crop_and_resize. box_indices = tf.cast(ix[:, 0], tf.int32) # Keep track of which box is mapped to which level box_to_level.append(ix) # Stop gradient propogation to ROI proposals level_boxes = tf.stop_gradient(level_boxes) box_indices = tf.stop_gradient(box_indices) # Crop and Resize # From Mask R-CNN paper: "We sample four regular locations, so # that we can evaluate either max or average pooling. In fact, # interpolating only a single value at each bin center (without # pooling) is nearly as effective." # # Here we use the simplified approach of a single value per bin, # which is how it's done in tf.crop_and_resize() # Result: [batch * num_boxes, pool_height, pool_width, channels] pooled.append(tf.image.crop_and_resize( feature_maps[i], level_boxes, box_indices, self.pool_shape, method="bilinear")) # Pack pooled features into one tensor pooled = tf.concat(pooled, axis=0) # Pack box_to_level mapping into one array and add another # column representing the order of pooled boxes box_to_level = tf.concat(box_to_level, axis=0) box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], axis=1) # Rearrange pooled features to match the order of the original boxes # Sort box_to_level by batch then box index # TF doesn't have a way to sort by two columns, so merge them and sort. sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] ix = tf.nn.top_k(sorting_tensor, k=tf.shape( box_to_level)[0]).indices[::-1] ix = tf.gather(box_to_level[:, 2], ix) pooled = tf.gather(pooled, ix) # Re-add the batch dimension shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0) pooled = tf.reshape(pooled, shape) return pooled
def cutmix(data_dict, cutmix_lambda, rng_seed_for_testing=None): """ implementation of cutmix, https://arxiv.org/abs/1905.04899 There is a major difference in our implementation. While the authors proposed sampling cutmix_lambda from a uniform distribution of [0, 1), we found that this over-regularised our models. Instead, we use a fixed value for lambda. We denote the images from the un-permuted minibatch as the 'base' images. We denote the patches we are pasting in as coming from the 'shuffled' images :param data_dict: data dictionary with the 'image' and 'label' for the batch :param cutmix_lambda: approximate proportion of the output image that the 'base' image makes up :param rng_seed_for_testing: seed for testing purposes :return: data_dict with new fields 'cutmix_label', 'cutmix_lambda' and (if mixup) 'cutmix_label2', all of which are to be used in the loss function """ assert 0. <= cutmix_lambda <= 1., "cutmix lambda must be between 0. and 1." input_images = data_dict['image'] batch_size = int(input_images.shape[0]) channels = int(input_images.shape[-1]) assert tf.keras.backend.ndim(input_images) == 4 assert batch_size > 1, "cutmix must have batch size > 1" # do the shuffling by 'bumping' the array along by TWO. Random shuffling will end up with many cases of the image # being shuffled with itself (because we normally have small batches). # note that we roll by 2 instead of 1, because if we are also using mixup, we do not want the base image to be # cut AND mixed with the same shuffled image permute_batch_op = cutmix_permute_batch_op(batch_size) shuffled_batch = permute_batch_op(input_images) cutmix_lambda = tf.cast(cutmix_lambda, tf.float32) # NHWC h = int(input_images.shape[1]) w = int(input_images.shape[2]) # coordinates for the centre of the cutout box r_x = tf.random.uniform([1], maxval=w, seed=rng_seed_for_testing) r_y = tf.random.uniform([1], maxval=h, seed=rng_seed_for_testing) # box will have same aspect ratio as the image itself r_w = w * tf.sqrt(1. - cutmix_lambda) r_h = h * tf.sqrt(1. - cutmix_lambda) # bounding box corner coordinates x1 = tf.cast(tf.round(tf.maximum(r_x - r_w / 2., 0.)), tf.int32)[0] x2 = tf.cast(tf.round(tf.minimum(r_x + r_w / 2., w)), tf.int32)[0] y1 = tf.cast(tf.round(tf.maximum(r_y - r_h / 2., 0.)), tf.int32)[0] y2 = tf.cast(tf.round(tf.minimum(r_y + r_h / 2., h)), tf.int32)[0] # creating the box of logicals (1=shuffled image, 0=original image) x = tf.range(w) y = tf.range(h) in_x_range = tf.cast(tf.logical_and(x >= x1, x < x2), tf.int32) in_y_range = tf.cast(tf.logical_and(y >= y1, y < y2), tf.int32) # y is related to the height, so shape is [None, len(in_y_range), len(in_x_range), None] mask = tf.cast(in_x_range * in_y_range[:, None], tf.bool) # proportion of the area that is 'zeros' (corresponding to original image) cutmix_value = 1. - tf.reduce_mean(tf.cast(mask, tf.float32)) # get mask ready mask = tf.tile(mask[None, ..., None], [batch_size, 1, 1, channels]) # same lambda value for each member of the minibatch cutmix_lambda = tf.fill([batch_size], value=tf.cast(cutmix_value, input_images.dtype)) # where mask == 1, choose the shuffled batch. else unshuffled output_images = tf.where(mask, shuffled_batch, input_images) data_dict['image'] = output_images data_dict['cutmix_label'] = permute_batch_op(data_dict['label']) data_dict['cutmix_lambda'] = cutmix_lambda # mixup has been done if 'label_mixed_up' in data_dict: # need to store all the labels that will apply to a given image (that has already been mixed up) data_dict['cutmix_label2'] = permute_batch_op( data_dict['label_mixed_up']) return data_dict
def refine_detection_graph(rois, probs, deltas, window, config): # Class ID per ROI class_ids = tf.argmax(probs, axis=1) # Class probebility of top class of each ROI indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) ## gather will collect slices while gather_nd collect points/slices class_scores = tf.gather_nd(probs, indices) # Class-specific bounding box deltas [N, num_classes, (dy, dx, log(dh), log(dw))] deltas_specific = tf.gather_nd(deltas, indices) refined_rois = apply_box_deltas_graph(rois, deltas_specific*config.BBOX_STD_DEV) refined_rois = clip_to_window(window, refined_rois) refined_rois = tf.to_int32(tf.rint(refined_rois))# round and cast to int # Filter out bg(0) boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if config.DETECTION_MIN_CONFIDENCE: conf_keep = tf.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) # tf.unique: return a tuple of (unique values, indices) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): # apply nms for a given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id)) # apply nms, return 1D array with indices class_keep = tf.image.non_max_suppression( tf.to_float(tf.gather(pre_nms_rois, ixs)), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.DETECTION_NMS_THRESHOLD ) class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] # why pad -1 ??? class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape ??? class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) nms_keep = tf.reshape(nms_keep, [-1]) # return 1D array nms_keep = tf.gather(nms_keep, tf.where(nms_keep>-1)[:,0]) # 4. Compute intersection between keep and nms_keep. Why need this step ??? keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) # tf.nn.top_k: return a tuple of (values, indices) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] detections = tf.concat([ tf.to_float(tf.gather(refined_rois, keep)), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.to_float(tf.gather(class_scores, keep))[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def ssd_losses_old(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.device(device): with tf.name_scope(scope, 'ssd_losses'): l_cross_pos = [] l_cross_neg = [] l_loc = [] for i in range(len(logits)): dtype = logits[i].dtype with tf.name_scope('block_%i' % i): # Sizing weight... wsize = tfe.get_shape(logits[i], rank=5) wsize = wsize[1] * wsize[2] * wsize[3] # Positive mask. pmask = gscores[i] > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Select some random negative entries. # n_entries = np.prod(gclasses[i].get_shape().as_list()) # r_positive = n_positives / n_entries # r_negative = negative_ratio * n_positives / (n_entries - n_positives) # Negative mask. no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits[i]) nmask = tf.logical_and(tf.logical_not(pmask), gscores[i] > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, :, :, :, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. n_neg = tf.cast(negative_ratio * n_positives, tf.int32) n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8) n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4) max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): fpmask = wsize * fpmask loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=gclasses[i]) loss = tf.losses.compute_weighted_loss(loss, fpmask) l_cross_pos.append(loss) with tf.name_scope('cross_entropy_neg'): fnmask = wsize * fnmask loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=no_classes) loss = tf.losses.compute_weighted_loss(loss, fnmask) l_cross_neg.append(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i]) loss = tf.losses.compute_weighted_loss(loss, weights) l_loc.append(loss) # Additional total losses... with tf.name_scope('total'): total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy') total_loc = tf.add_n(l_loc, 'localization') # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc)
def enjoy( env, policy_func, *, timesteps_per_actorbatch, # timesteps per actor per update clip_param, entcoeff, # clipping parameter epsilon, entropy coeff optim_epochs, optim_stepsize, optim_batchsize, # optimization hypers gamma, lam, # advantage estimation max_timesteps=0, max_episodes=0, max_iters=0, max_seconds=0, # time constraint callback=None, # you can do anything in the callback, since it takes locals(), globals() adam_epsilon=1e-5, schedule='constant', # annealing for stepsize parameters (epsilon and adam) save_name=None, save_per_acts=3, sensor=False, reload_name=None): # Setup losses and stuff # ---------------------------------------- if sensor: ob_space = env.sensor_space else: ob_space = env.observation_space ac_space = env.action_space pi = policy_func("pi", ob_space, ac_space) # Construct network for new policy oldpi = policy_func("oldpi", ob_space, ac_space) # Network for old policy atarg = tf.placeholder( dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) ret = tf.placeholder(dtype=tf.float32, shape=[None]) # Empirical return lrmult = tf.placeholder( name='lrmult', dtype=tf.float32, shape=[]) # learning rate multiplier, updated with schedule clip_param = clip_param * lrmult # Annealed cliping parameter epislon ob = U.get_placeholder_cached(name="ob") ac = pi.pdtype.sample_placeholder([None]) kloldnew = oldpi.pd.kl(pi.pd) ent = pi.pd.entropy() meankl = tf.reduce_mean(kloldnew) meanent = tf.reduce_mean(ent) pol_entpen = (-entcoeff) * meanent ratio = tf.exp(pi.pd.logp(ac) - oldpi.pd.logp(ac)) # pnew / pold surr1 = ratio * atarg # surrogate from conservative policy iteration surr2 = tf.clip_by_value(ratio, 1.0 - clip_param, 1.0 + clip_param) * atarg # pol_surr = -tf.reduce_mean(tf.minimum( surr1, surr2)) # PPO's pessimistic surrogate (L^CLIP) vf_loss = tf.reduce_mean(tf.square(pi.vpred - ret)) total_loss = pol_surr + pol_entpen + vf_loss losses = [pol_surr, pol_entpen, vf_loss, meankl, meanent] loss_names = ["pol_surr", "pol_entpen", "vf_loss", "kl", "ent"] var_list = pi.get_trainable_variables() lossandgrad = U.function([ob, ac, atarg, ret, lrmult], losses + [U.flatgrad(total_loss, var_list)]) adam = MpiAdam(var_list, epsilon=adam_epsilon) assign_old_eq_new = U.function( [], [], updates=[ tf.assign(oldv, newv) for (oldv, newv) in zipsame(oldpi.get_variables(), pi.get_variables()) ]) compute_losses = U.function([ob, ac, atarg, ret, lrmult], losses) U.initialize() adam.sync() if reload_name: saver = tf.train.Saver() saver.restore(tf.get_default_session(), reload_name) print("Loaded model successfully.") # Prepare for rollouts # ---------------------------------------- seg_gen = traj_segment_generator(pi, env, timesteps_per_actorbatch, stochastic=True, sensor=sensor) episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 tstart = time.time() lenbuffer = deque(maxlen=100) # rolling buffer for episode lengths rewbuffer = deque(maxlen=100) # rolling buffer for episode rewards assert sum( [max_iters > 0, max_timesteps > 0, max_episodes > 0, max_seconds > 0]) == 1, "Only one time constraint permitted" while True: if callback: callback(locals(), globals()) if max_timesteps and timesteps_so_far >= max_timesteps: break elif max_episodes and episodes_so_far >= max_episodes: break elif max_iters and iters_so_far >= max_iters: break elif max_seconds and time.time() - tstart >= max_seconds: break if schedule == 'constant': cur_lrmult = 1.0 elif schedule == 'linear': cur_lrmult = max(1.0 - float(timesteps_so_far) / max_timesteps, 0) else: raise NotImplementedError logger.log("********** Iteration %i ************" % iters_so_far) seg = seg_gen.__next__() add_vtarg_and_adv(seg, gamma, lam) # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[ "tdlamret"] vpredbefore = seg["vpred"] # predicted value function before udpate atarg = (atarg - atarg.mean() ) / atarg.std() # standardized advantage function estimate d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret), shuffle=not pi.recurrent) optim_batchsize = optim_batchsize or ob.shape[0] if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy assign_old_eq_new() # set old parameter values to new parameter values logger.log("Optimizing...") logger.log(fmt_row(13, loss_names)) # Here we do a bunch of optimization epochs over the data for _ in range(optim_epochs): losses = [ ] # list of tuples, each of which gives the loss for a minibatch for batch in d.iterate_once(optim_batchsize): *newlosses, g = lossandgrad(batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult) adam.update(g, optim_stepsize * cur_lrmult) losses.append(newlosses)
def angle_delta(x, y): angle_difference = tf.subtract(x, y) mod = tf.mod(angle_difference, tf.constant(2 * np.pi, dtype=x.dtype)) return tf.minimum(tf.constant([2 * np.pi], dtype=x.dtype) - mod, mod)
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., device='/cpu:0', scope=None): with tf.name_scope(scope, 'ssd_losses'): lshape = tfe.get_shape(logits[0], 5) num_classes = lshape[-1] batch_size = lshape[0] # Flatten out all vectors! flogits = [] fgclasses = [] fgscores = [] flocalisations = [] fglocalisations = [] for i in range(len(logits)): flogits.append(tf.reshape(logits[i], [-1, num_classes])) fgclasses.append(tf.reshape(gclasses[i], [-1])) fgscores.append(tf.reshape(gscores[i], [-1])) flocalisations.append(tf.reshape(localisations[i], [-1, 4])) fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4])) # And concat the crap! logits = tf.concat(flogits, axis=0) gclasses = tf.concat(fgclasses, axis=0) gscores = tf.concat(fgscores, axis=0) localisations = tf.concat(flocalisations, axis=0) glocalisations = tf.concat(fglocalisations, axis=0) dtype = logits.dtype # Compute positive matching mask... pmask = gscores > match_threshold fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Hard negative mining... no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits) nmask = tf.logical_and(tf.logical_not(pmask), gscores > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) max_hard_pred = -val[-1] # Final negative mask. nmask = tf.logical_and(nmask, nvalues < max_hard_pred) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=gclasses) loss = tf.div(tf.reduce_sum(loss * fpmask), batch_size, name='value') tf.losses.add_loss(loss) with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=no_classes) loss = tf.div(tf.reduce_sum(loss * fnmask), batch_size, name='value') tf.losses.add_loss(loss) # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims(alpha * fpmask, axis=-1) loss = custom_layers.abs_smooth(localisations - glocalisations) loss = tf.div(tf.reduce_sum(loss * weights), batch_size, name='value') tf.losses.add_loss(loss)
def Friend_relu(x): x = tf.nn.relu(x) Max = tf.constant([255.0]) return tf.minimum(x, Max)
def _train(self, experience, num_updates, mini_batch_size, mini_batch_length, update_counter_every_mini_batch, should_summarize): """Train using experience.""" experience = nest_utils.params_to_distributions( experience, self.experience_spec) experience = self.transform_timestep(experience) experience = self.preprocess_experience(experience) experience = nest_utils.distributions_to_params(experience) length = experience.step_type.shape[1] if isinstance(self.exp_replayer, CyclicOneTimeExperienceReplayer): # cycle_one_time replayer first mini_batch length == unroll_length # and later ones == unroll_length + 1, so that all timesteps are # used in training. assert (mini_batch_length == length or mini_batch_length + 1 == length) mini_batch_length = length mini_batch_length = (mini_batch_length or length) assert length % mini_batch_length == 0, ( "length=%s not a multiple of mini_batch_length=%s" % (length, mini_batch_length)) if len(tf.nest.flatten( self.train_state_spec)) > 0 and not self._use_rollout_state: if mini_batch_length == 1: logging.fatal( "Should use TrainerConfig.use_rollout_state=True " "for off-policy training of RNN when minibatch_length==1.") else: common.warning_once( "Consider using TrainerConfig.use_rollout_state=True " "for off-policy training of RNN.") experience = tf.nest.map_structure( lambda x: tf.reshape( x, common.concat_shape([-1, mini_batch_length], tf.shape(x)[2:])), experience) batch_size = tf.shape(experience.step_type)[0] mini_batch_size = (mini_batch_size or batch_size) def _make_time_major(nest): """Put the time dim to axis=0.""" return tf.nest.map_structure(lambda x: common.transpose2(x, 0, 1), nest) scope = get_current_scope() for u in tf.range(num_updates): if mini_batch_size < batch_size: indices = tf.random.shuffle( tf.range(tf.shape(experience.step_type)[0])) experience = tf.nest.map_structure( lambda x: tf.gather(x, indices), experience) for b in tf.range(0, batch_size, mini_batch_size): if update_counter_every_mini_batch: common.get_global_counter().assign_add(1) is_last_mini_batch = tf.logical_and( tf.equal(u, num_updates - 1), tf.greater_equal(b + mini_batch_size, batch_size)) do_summary = tf.logical_or(is_last_mini_batch, update_counter_every_mini_batch) common.enable_summary(do_summary) batch = tf.nest.map_structure( lambda x: x[b:tf.minimum(batch_size, b + mini_batch_size)], experience) batch = _make_time_major(batch) # Tensorflow graph mode loses the original name scope here. We # need to restore the original name scope with tf.name_scope(scope): training_info, loss_info, grads_and_vars = self._update( batch, weight=tf.cast( tf.shape(batch.step_type)[1], tf.float32) / float(mini_batch_size)) if should_summarize: if do_summary: # Putting `if do_summary` under the above `with` statement # does not help. Somehow `if` statement will also lose # the original name scope. with tf.name_scope(scope): self.summarize_train(training_info, loss_info, grads_and_vars) train_steps = batch_size * mini_batch_length * num_updates return train_steps
def ppo(workload_file, model_path, ac_kwargs=dict(), seed=0, traj_per_epoch=4000, epochs=50, gamma=0.99, clip_ratio=0.2, pi_lr=3e-4, vf_lr=1e-3, train_pi_iters=80, train_v_iters=80, lam=0.97, max_ep_len=1000, target_kl=0.01, logger_kwargs=dict(), save_freq=10, pre_trained=0, trained_model=None, attn=False, shuffle=False, backfil=False, skip=False, score_type=0, batch_job_slice=0, sched_algo=4): logger = EpochLogger(**logger_kwargs) logger.save_config(locals()) tf.set_random_seed(seed) np.random.seed(seed) env = HPCEnvSkip(shuffle=shuffle, backfil=backfil, skip=skip, job_score_type=score_type, batch_job_slice=batch_job_slice, build_sjf=False, sched_algo=sched_algo) env.seed(seed) env.my_init(workload_file=workload_file, sched_file=model_path) obs_dim = env.observation_space.shape act_dim = env.action_space.shape # Share information about action space with policy architecture ac_kwargs['action_space'] = env.action_space ac_kwargs['attn'] = attn # Inputs to computation graph buf = PPOBuffer(obs_dim, act_dim, traj_per_epoch * JOB_SEQUENCE_SIZE, gamma, lam) if pre_trained: sess = tf.Session() model = restore_tf_graph(sess, trained_model) logger.log('load pre-trained model') # Count variables var_counts = tuple(count_vars(scope) for scope in ['pi', 'v']) logger.log('\nNumber of parameters: \t pi: %d, \t v: %d\n' % var_counts) x_ph = model['x'] a_ph = model['a'] mask_ph = model['mask'] adv_ph = model['adv'] ret_ph = model['ret'] logp_old_ph = model['logp_old_ph'] pi = model['pi'] v = model['v'] # logits = model['logits'] out = model['out'] logp = model['logp'] logp_pi = model['logp_pi'] pi_loss = model['pi_loss'] v_loss = model['v_loss'] approx_ent = model['approx_ent'] approx_kl = model['approx_kl'] clipfrac = model['clipfrac'] clipped = model['clipped'] # Optimizers # graph = tf.get_default_graph() # op = sess.graph.get_operations() # [print(m.values()) for m in op] # train_pi = graph.get_tensor_by_name('pi/conv2d/kernel/Adam:0') # train_v = graph.get_tensor_by_name('v/conv2d/kernel/Adam:0') train_pi = tf.get_collection("train_pi")[0] train_v = tf.get_collection("train_v")[0] # train_pi_optimizer = MpiAdamOptimizer(learning_rate=pi_lr, name='AdamLoad') # train_pi = train_pi_optimizer.minimize(pi_loss) # train_v_optimizer = MpiAdamOptimizer(learning_rate=vf_lr, name='AdamLoad') # train_v = train_v_optimizer.minimize(v_loss) # sess.run(tf.variables_initializer(train_pi_optimizer.variables())) # sess.run(tf.variables_initializer(train_v_optimizer.variables())) # Need all placeholders in *this* order later (to zip with data from buffer) all_phs = [x_ph, a_ph, mask_ph, adv_ph, ret_ph, logp_old_ph] # Every step, get: action, value, and logprob get_action_ops = [pi, v, logp_pi, out] else: x_ph, a_ph = placeholders_from_spaces(env.observation_space, env.action_space) # y_ph = placeholder(JOB_SEQUENCE_SIZE*3) # 3 is the number of sequence features mask_ph = placeholder(env.action_space.n) adv_ph, ret_ph, logp_old_ph = placeholders(None, None, None) # Main outputs from computation graph pi, logp, logp_pi, v, out = actor_critic(x_ph, a_ph, mask_ph, **ac_kwargs) # Need all placeholders in *this* order later (to zip with data from buffer) all_phs = [x_ph, a_ph, mask_ph, adv_ph, ret_ph, logp_old_ph] # Every step, get: action, value, and logprob get_action_ops = [pi, v, logp_pi, out] # Experience buffer # Count variables var_counts = tuple(count_vars(scope) for scope in ['pi', 'v']) logger.log('\nNumber of parameters: \t pi: %d, \t v: %d\n' % var_counts) # PPO objectives ratio = tf.exp(logp - logp_old_ph) # pi(a|s) / pi_old(a|s) min_adv = tf.where(adv_ph > 0, (1 + clip_ratio) * adv_ph, (1 - clip_ratio) * adv_ph) pi_loss = -tf.reduce_mean(tf.minimum(ratio * adv_ph, min_adv)) v_loss = tf.reduce_mean((ret_ph - v)**2) # Info (useful to watch during learning) approx_kl = tf.reduce_mean( logp_old_ph - logp) # a sample estimate for KL-divergence, easy to compute approx_ent = tf.reduce_mean( -logp) # a sample estimate for entropy, also easy to compute clipped = tf.logical_or(ratio > (1 + clip_ratio), ratio < (1 - clip_ratio)) clipfrac = tf.reduce_mean(tf.cast(clipped, tf.float32)) # Optimizers train_pi = tf.train.AdamOptimizer( learning_rate=pi_lr).minimize(pi_loss) train_v = tf.train.AdamOptimizer(learning_rate=vf_lr).minimize(v_loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) tf.add_to_collection("train_pi", train_pi) tf.add_to_collection("train_v", train_v) # Setup model saving # logger.setup_tf_saver(sess, inputs={'x': x_ph}, outputs={'action_probs': action_probs, 'log_picked_action_prob': log_picked_action_prob, 'v': v}) logger.setup_tf_saver(sess, inputs={ 'x': x_ph, 'a': a_ph, 'adv': adv_ph, 'mask': mask_ph, 'ret': ret_ph, 'logp_old_ph': logp_old_ph }, outputs={ 'pi': pi, 'v': v, 'out': out, 'pi_loss': pi_loss, 'logp': logp, 'logp_pi': logp_pi, 'v_loss': v_loss, 'approx_ent': approx_ent, 'approx_kl': approx_kl, 'clipped': clipped, 'clipfrac': clipfrac }) def update(): inputs = {k: v for k, v in zip(all_phs, buf.get())} pi_l_old, v_l_old, ent = sess.run([pi_loss, v_loss, approx_ent], feed_dict=inputs) # Training for i in range(train_pi_iters): _, kl = sess.run([train_pi, approx_kl], feed_dict=inputs) kl = mpi_avg(kl) if kl > 1.5 * target_kl: logger.log( 'Early stopping at step %d due to reaching max kl.' % i) break logger.store(StopIter=i) for _ in range(train_v_iters): sess.run(train_v, feed_dict=inputs) # Log changes from update pi_l_new, v_l_new, kl, cf = sess.run( [pi_loss, v_loss, approx_kl, clipfrac], feed_dict=inputs) logger.store(LossPi=pi_l_old, LossV=v_l_old, KL=kl, Entropy=ent, ClipFrac=cf, DeltaLossPi=(pi_l_new - pi_l_old), DeltaLossV=(v_l_new - v_l_old)) start_time = time.time() [o, co], r, d, ep_ret, ep_len, show_ret, sjf, f1, skip_count = env.reset( ), 0, False, 0, 0, 0, 0, 0, 0 # Main loop: collect experience in env and update/log each epoch start_time = time.time() for epoch in range(epochs): t = 0 discard_times = 0 while True: # [no_skip, skip] lst = [1, 1] #for i in range(0, MAX_QUEUE_SIZE * JOB_FEATURES, JOB_FEATURES): # job = o[i:i + JOB_FEATURES] # # the skip time of will_skip job exceeds MAX_SKIP_TIME # if job[-2] == 1.0: # lst = [1,0] a, v_t, logp_t, output = sess.run(get_action_ops, feed_dict={ x_ph: o.reshape(1, -1), mask_ph: np.array(lst).reshape(1, -1) }) # print(a, end=" ") ''' action = np.random.choice(np.arange(MAX_QUEUE_SIZE), p=action_probs) log_action_prob = np.log(action_probs[action]) ''' if buf.ptr - buf.path_start_idx >= 10 * JOB_SEQUENCE_SIZE or buf.ptr >= buf.max_size: discard_times += 1 buf.ptr = buf.path_start_idx [ o, co ], r, d, ep_ret, ep_len, show_ret, sjf, f1, skip_count = env.reset( ), 0, False, 0, 0, 0, 0, 0, 0 continue # save and log buf.store(o, None, a, np.array(lst), r, v_t, logp_t) logger.store(VVals=v_t) if a[0] == 1: skip_count += 1 o, r, d, r2, sjf_t, f1_t = env.step(a[0]) ep_ret += r ep_len += 1 show_ret += r2 sjf += sjf_t f1 += f1_t if d: t += 1 buf.finish_path(r) logger.store(EpRet=ep_ret, EpLen=ep_len, ShowRet=show_ret, SJF=sjf, F1=f1, SkipRatio=skip_count / ep_len) [ o, co ], r, d, ep_ret, ep_len, show_ret, sjf, f1, skip_count = env.reset( ), 0, False, 0, 0, 0, 0, 0, 0 if t >= traj_per_epoch: # print ("state:", state, "\nlast action in a traj: action_probs:\n", action_probs, "\naction:", action) break # print("Sample time:", (time.time()-start_time)/num_total, num_total) # Save model if (epoch % save_freq == 0) or (epoch == epochs - 1): logger.save_state({'env': env}, None) # Perform PPO update! # start_time = time.time() update() # print("Train time:", time.time()-start_time) # Log info about epoch logger.log_tabular('Epoch', epoch) logger.log_tabular('EpRet', with_min_and_max=True) logger.log_tabular('EpLen', with_min_and_max=True) logger.log_tabular('VVals', with_min_and_max=True) logger.log_tabular('TotalEnvInteracts', (epoch + 1) * traj_per_epoch * JOB_SEQUENCE_SIZE) logger.log_tabular('LossPi', average_only=True) logger.log_tabular('LossV', average_only=True) logger.log_tabular('DeltaLossPi', average_only=True) logger.log_tabular('DeltaLossV', average_only=True) logger.log_tabular('Entropy', average_only=True) logger.log_tabular('KL', average_only=True) logger.log_tabular('ClipFrac', average_only=True) logger.log_tabular('StopIter', average_only=True) logger.log_tabular('ShowRet', average_only=True) logger.log_tabular('SJF', average_only=True) logger.log_tabular('F1', average_only=True) logger.log_tabular('SkipRatio', average_only=True) logger.log_tabular('Time', time.time() - start_time) logger.dump_tabular()
def MovingAvgQuantize(inputs, min_var, max_var, per_channel=False, ema_decay=0.999, name_prefix='MovingAvgQuantize', is_training=True, num_bits=8, narrow_range=False, symmetric=False): """Adds a layer that collects quantization ranges as EMAs of input ranges. MovingAvgQuantize creates variables called 'min' and 'max', representing the interval used for quantization and clamping. Args: inputs: a tensor containing values to be quantized. per_channel: (default False) a boolean specifying whether to use different quantization ranges per output channel. init_min: a float scalar, the initial value for variable min. init_max: a float scalar, the initial value for variable max. ema_decay: EMA decay parameter. name_prefix: name_prefix for created nodes. is_training: Whether the op is applied to a training or eval graph. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. symmetric: If true, use symmetric quantization limits instead of training the minimum and maximum of each quantization range separately. Returns: a tensor containing quantized values. """ with tf.name_scope(name_prefix): input_shape = inputs.get_shape() input_dim = len(input_shape) if not is_training: return _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range) if per_channel: if input_dim == 2: reduce_dims = [0] elif input_dim == 4: reduce_dims = [0, 1, 2] if per_channel: if input_dim >= 2: batch_min = tf.math.reduce_min(inputs, axis=reduce_dims, name='BatchMin') else: batch_min = inputs else: batch_min = tf.math.reduce_min(inputs, name='BatchMin') if per_channel: if input_dim >= 2: batch_max = tf.math.reduce_max(inputs, axis=reduce_dims, name='BatchMax') else: batch_max = inputs else: batch_max = tf.math.reduce_max(inputs, name='BatchMax') if symmetric: if narrow_range: min_max_ratio = -1 else: # In two's complement notation, the negative range is slightly larger # than the positive range. min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits) # TFLite requires that 0.0 if always in the [min; max] range. Because # batch_min <= batch_max, it follows that range_min <= 0 <= range_max. range_min = tf.minimum(batch_min, batch_max / min_max_ratio) range_max = tf.maximum(batch_max, batch_min * min_max_ratio) else: # TFLite requires that 0.0 if always in the [min; max] range. range_min = tf.minimum(batch_min, 0.0) range_max = tf.maximum(batch_max, 0.0) assign_min = moving_averages.assign_moving_average(min_var, range_min, ema_decay, zero_debias=False, name='AssignMinEma') assign_max = moving_averages.assign_moving_average(max_var, range_max, ema_decay, zero_debias=False, name='AssignMaxEma') return _FakeQuantWithMinMaxVars(inputs, assign_min, assign_max, per_channel=per_channel, num_bits=num_bits, narrow_range=narrow_range)
def _add_interpretation_graph(self): """Interpret NN output.""" mc = self.mc with tf.variable_scope('interpret_output') as scope: preds = self.preds # probability num_class_probs = mc.ANCHOR_PER_GRID * mc.CLASSES self.pred_class_probs = tf.reshape( tf.nn.softmax( tf.reshape(preds[:, :, :, :num_class_probs], [-1, mc.CLASSES])), [mc.BATCH_SIZE, mc.ANCHORS, mc.CLASSES], name='pred_class_probs') print("pred_class_probs shape: ", self.pred_class_probs.get_shape()) # confidence num_confidence_scores = mc.ANCHOR_PER_GRID + num_class_probs self.pred_conf = tf.sigmoid(tf.reshape( preds[:, :, :, num_class_probs:num_confidence_scores], [mc.BATCH_SIZE, mc.ANCHORS]), name='pred_confidence_score') print("pred_confidence_score: ", self.pred_conf.get_shape()) # bbox_delta self.pred_box_delta = tf.reshape(preds[:, :, :, num_confidence_scores:], [mc.BATCH_SIZE, mc.ANCHORS, 4], name='bbox_delta') print("bbox_delta: ", self.pred_box_delta.get_shape()) # number of object. Used to normalize bbox and classification loss self.num_objects = tf.reduce_sum(self.input_mask, name='num_objects') with tf.variable_scope('bbox') as scope: with tf.variable_scope('stretching'): delta_x, delta_y, delta_w, delta_h = tf.unstack( self.pred_box_delta, axis=2) anchor_x = mc.ANCHOR_BOX[:, 0] anchor_y = mc.ANCHOR_BOX[:, 1] anchor_w = mc.ANCHOR_BOX[:, 2] anchor_h = mc.ANCHOR_BOX[:, 3] box_center_x = tf.identity(anchor_x + delta_x * anchor_w, name='bbox_cx') box_center_y = tf.identity(anchor_y + delta_y * anchor_h, name='bbox_cy') box_width = tf.identity(anchor_w * util.safe_exp(delta_w, mc.EXP_THRESH), name='bbox_width') box_height = tf.identity(anchor_h * util.safe_exp(delta_h, mc.EXP_THRESH), name='bbox_height') self._activation_summary(delta_x, 'delta_x') self._activation_summary(delta_y, 'delta_y') self._activation_summary(delta_w, 'delta_w') self._activation_summary(delta_h, 'delta_h') self._activation_summary(box_center_x, 'bbox_cx') self._activation_summary(box_center_y, 'bbox_cy') self._activation_summary(box_width, 'bbox_width') self._activation_summary(box_height, 'bbox_height') with tf.variable_scope('trimming'): xmins, ymins, xmaxs, ymaxs = util.bbox_transform( [box_center_x, box_center_y, box_width, box_height]) # The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based # pixels. Same for y. xmins = tf.minimum(tf.maximum(0.0, xmins), mc.IMAGE_WIDTH - 1.0, name='bbox_xmin') self._activation_summary(xmins, 'box_xmin') ymins = tf.minimum(tf.maximum(0.0, ymins), mc.IMAGE_HEIGHT - 1.0, name='bbox_ymin') self._activation_summary(ymins, 'box_ymin') xmaxs = tf.maximum(tf.minimum(mc.IMAGE_WIDTH - 1.0, xmaxs), 0.0, name='bbox_xmax') self._activation_summary(xmaxs, 'box_xmax') ymaxs = tf.maximum(tf.minimum(mc.IMAGE_HEIGHT - 1.0, ymaxs), 0.0, name='bbox_ymax') self._activation_summary(ymaxs, 'box_ymax') self.det_boxes = tf.transpose(tf.stack( util.bbox_transform_inv([xmins, ymins, xmaxs, ymaxs])), (1, 2, 0), name='bbox') with tf.variable_scope('IOU'): def _tensor_iou(box1, box2): with tf.variable_scope('intersection'): xmin = tf.maximum(box1[0], box2[0], name='xmin') ymin = tf.maximum(box1[1], box2[1], name='ymin') xmax = tf.minimum(box1[2], box2[2], name='xmax') ymax = tf.minimum(box1[3], box2[3], name='ymax') w = tf.maximum(0.0, xmax - xmin, name='inter_w') h = tf.maximum(0.0, ymax - ymin, name='inter_h') intersection = tf.multiply(w, h, name='intersection') with tf.variable_scope('union'): w1 = tf.subtract(box1[2], box1[0], name='w1') h1 = tf.subtract(box1[3], box1[1], name='h1') w2 = tf.subtract(box2[2], box2[0], name='w2') h2 = tf.subtract(box2[3], box2[1], name='h2') union = w1 * h1 + w2 * h2 - intersection return intersection/(union+mc.EPSILON) \ * tf.reshape(self.input_mask, [mc.BATCH_SIZE, mc.ANCHORS]) self.ious = self.ious.assign( _tensor_iou( util.bbox_transform(tf.unstack(self.det_boxes, axis=2)), util.bbox_transform(tf.unstack(self.box_input, axis=2)))) self._activation_summary(self.ious, 'conf_score') with tf.variable_scope('probability') as scope: self._activation_summary(self.pred_class_probs, 'class_probs') probs = tf.multiply(self.pred_class_probs, tf.reshape(self.pred_conf, [mc.BATCH_SIZE, mc.ANCHORS, 1]), name='final_class_prob') self._activation_summary(probs, 'final_class_prob') self.det_probs = tf.reduce_max(probs, 2, name='score') self.det_class = tf.argmax(probs, 2, name='class_idx')
def Input_Kernel(input_data, Midi_low, Midi_high, time_init): """ Arguments: input_data: size = [batch_size x num_notes x num_timesteps x 2] (the input data represents that at the previous timestep of what we are trying to predict) Midi_low: integer Midi_high: integer time_init: integer representing where the 'beat' component begins for the batch. Returns: Note_State_Expand: size = [batch_size x num_notes x num_timesteps x 80] """ # Capture input_data dimensions (batch_size and num_timesteps are variable length) batch_size = tf.shape(input_data)[0] num_notes = input_data.get_shape()[1].value num_timesteps = tf.shape(input_data)[2] # MIDI note number (only a function of the note index) Midi_indices = tf.squeeze( tf.range(start=Midi_low, limit=Midi_high + 1, delta=1)) x_Midi = tf.ones( (batch_size, num_timesteps, 1, num_notes)) * tf.cast(Midi_indices, dtype=tf.float32) x_Midi = tf.transpose( x_Midi, perm=[0, 3, 1, 2]) # shape = batch_size, num_notes, num_timesteps, 1 #print('x_Midi shape = ', x_Midi.get_shape()) # part_pitchclass (only a function of the note index) Midi_pitchclasses = tf.squeeze(x_Midi % 12, axis=3) x_pitch_class = tf.one_hot(tf.cast(Midi_pitchclasses, dtype=tf.uint8), depth=12) #print('x_pitch_class shape = ', x_pitch_class.get_shape()) # part_prev_vicinity input_flatten = tf.transpose(input_data, perm=[0, 2, 1, 3]) input_flatten = tf.reshape( input_flatten, [batch_size * num_timesteps, num_notes, 2 ]) # channel for play and channel for articulate input_flatten_p = tf.slice(input_flatten, [0, 0, 0], size=[-1, -1, 1]) input_flatten_a = tf.slice(input_flatten, [0, 0, 1], size=[-1, -1, 1]) # reverse identity kernel filt_vicinity = tf.expand_dims(tf.eye(25), axis=1) #1D convolutional filter for each play and articulate arrays vicinity_p = tf.nn.conv1d(input_flatten_p, filt_vicinity, stride=1, padding='SAME') vicinity_a = tf.nn.conv1d(input_flatten_a, filt_vicinity, stride=1, padding='SAME') #concatenate back together and restack such that play-articulate numbers alternate vicinity = tf.stack([vicinity_p, vicinity_a], axis=3) vicinity = tf.unstack(vicinity, axis=2) vicinity = tf.concat(vicinity, axis=2) #reshape by major dimensions, THEN swap axes x_vicinity = tf.reshape(vicinity, shape=[batch_size, num_timesteps, num_notes, 50]) x_vicinity = tf.transpose(x_vicinity, perm=[0, 2, 1, 3]) #print('x_prev vicinity shape = ', x_vicinity.get_shape()) #part_prev_context input_flatten_p_bool = tf.minimum(input_flatten_p, 1) # 1 if note is played, 0 if not played. Don't care about articulation #kernel filt_context = tf.expand_dims(tf.tile(tf.eye(12), multiples=[(num_notes // 12) * 2, 1]), axis=1) #print('filt_context size = ', filt_context.get_shape()) context = tf.nn.conv1d(input_flatten_p_bool, filt_context, stride=1, padding='SAME') x_context = tf.reshape(context, shape=[batch_size, num_timesteps, num_notes, 12]) x_context = tf.transpose(x_context, perm=[0, 2, 1, 3]) #print('x_prev context shape = ', x_prev_context.get_shape()) #beat (only a function of the time axis index plus the time_init value Time_indices = tf.range(time_init, num_timesteps + time_init) x_Time = tf.reshape(tf.tile(Time_indices, multiples=[batch_size * num_notes]), shape=[batch_size, num_notes, num_timesteps, 1]) x_beat = tf.cast(tf.concat( [x_Time % 2, x_Time // 2 % 2, x_Time // 4 % 2, x_Time // 8 % 2], axis=-1), dtype=tf.float32) #print('x_beat shape = ', x_beat.get_shape()) #zero x_zero = tf.zeros([batch_size, num_notes, num_timesteps, 1]) #Final Vector Note_State_Expand = tf.concat( [x_Midi, x_pitch_class, x_vicinity, x_context, x_beat, x_zero], axis=-1) return Note_State_Expand
def ssd_losses(logits, localisations, gclasses, glocalisations, gscores, match_threshold=0.5, negative_ratio=3., alpha=1., label_smoothing=0., scope=None): """Loss functions for training the SSD 300 VGG network. This function defines the different loss components of the SSD, and adds them to the TF loss collection. Arguments: logits: (list of) predictions logits Tensors; localisations: (list of) localisations Tensors; gclasses: (list of) groundtruth labels Tensors; glocalisations: (list of) groundtruth localisations Tensors; gscores: (list of) groundtruth score Tensors; """ with tf.name_scope(scope, 'ssd_losses'): l_cross_pos = [] l_cross_neg = [] l_loc = [] for i in range(len(logits)): dtype = logits[i].dtype with tf.name_scope('block_%i' % i): # Determine weights Tensor. pmask = gscores[ i] > match_threshold # treat as positive (matched) if score is greater than some threshold !!! fpmask = tf.cast(pmask, dtype) n_positives = tf.reduce_sum(fpmask) # Select some random negative entries. # n_entries = np.prod(gclasses[i].get_shape().as_list()) # r_positive = n_positives / n_entries # r_negative = negative_ratio * n_positives / (n_entries - n_positives) # Negative mask. no_classes = tf.cast(pmask, tf.int32) predictions = slim.softmax(logits[i]) nmask = tf.logical_and( tf.logical_not(pmask), # treat rest as negative gscores[i] > -0.5) fnmask = tf.cast(nmask, dtype) nvalues = tf.where(nmask, predictions[:, :, :, :, 0], 1. - fnmask) nvalues_flat = tf.reshape(nvalues, [-1]) # Number of negative entries to select. n_neg = tf.cast(negative_ratio * n_positives, tf.int32) n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8) n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4) max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32) n_neg = tf.minimum(n_neg, max_neg_entries) val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg) minval = val[-1] # Final negative mask. nmask = tf.logical_and(nmask, -nvalues > minval) fnmask = tf.cast(nmask, dtype) # Add cross-entropy loss. with tf.name_scope('cross_entropy_pos'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=gclasses[i]) loss = tf.losses.compute_weighted_loss( loss, fpmask) # use positive mask for cross entropy positive l_cross_pos.append(loss) # positive cross entropy loss with tf.name_scope('cross_entropy_neg'): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits[i], labels=no_classes) loss = tf.losses.compute_weighted_loss( loss, fnmask) # use negative mask for cross entropy negative l_cross_neg.append(loss) # negative cross entropy loss # Add localization loss: smooth L1, L2, ... with tf.name_scope('localization'): # Weights Tensor: positive mask + random negative. weights = tf.expand_dims( alpha * fpmask, axis=-1 ) # alpha is just 1 here ... (see p.5 of paper end of paragraph "Training objective") loss = custom_layers.abs_smooth( localisations[i] - glocalisations[i] ) # smooth L1 oss (see eq 2 on p.5 of paper !) loss = tf.losses.compute_weighted_loss(loss, weights) l_loc.append(loss) # localization loss # Additional total losses... with tf.name_scope('total'): total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos') total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg') total_cross = tf.add( total_cross_pos, total_cross_neg, 'cross_entropy' ) # add positive and negative cross entropies to get total cross entropy total_loc = tf.add_n(l_loc, 'localization') # localiation loss # Add to EXTRA LOSSES TF.collection tf.add_to_collection('EXTRA_LOSSES', total_cross_pos) tf.add_to_collection('EXTRA_LOSSES', total_cross_neg) tf.add_to_collection('EXTRA_LOSSES', total_cross) tf.add_to_collection('EXTRA_LOSSES', total_loc)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--load_ckpt', '-l', help='Path to a check point file for load') parser.add_argument('--save_folder', '-s', default='log/seg', help='Path to folder for saving check points and summary') parser.add_argument('--model', '-m', default='shellconv', help='Model to use') parser.add_argument('--setting', '-x', ', help='Setting to use') parser.add_argument('--log', help='Log to FILE in save folder; use - for stdout (default is log.txt)', metavar='FILE', default='log.txt') args = parser.parse_args() time_string = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') root_folder = os.path.join(args.save_folder, '%s_%s_%s' % (args.model, args.setting, time_string)) if not os.path.exists(root_folder): os.makedirs(root_folder) global LOG_FOUT if args.log != '-': LOG_FOUT = open(os.path.join(root_folder, args.log), 'w') model = importlib.import_module(args.model) setting_path = os.path.join(os.path.dirname(__file__), args.model) sys.path.append(setting_path) setting = importlib.import_module(args.setting) num_epochs = setting.num_epochs batch_size = setting.batch_size sample_num = setting.sample_num step_val = setting.step_val label_weights_list = setting.label_weights rotation_range = setting.rotation_range rotation_range_val = setting.rotation_range_val scaling_range = setting.scaling_range scaling_range_val = setting.scaling_range_val jitter = setting.jitter jitter_val = setting.jitter_val is_list_of_h5_list = data_utils.is_h5_list(setting.filelist) if is_list_of_h5_list: seg_list = [setting.filelist] # for train else: seg_list = data_utils.load_seg_list(setting.filelist) # for train data_val, _, data_num_val, label_val, _ = data_utils.load_seg(setting.filelist_val) if data_val.shape[-1] > 3: data_val = data_val[:,:,:3] # only use the xyz coordinates point_num = data_val.shape[1] num_val = data_val.shape[0] batch_num_val = num_val // batch_size ###################################################################### # Placeholders indices = tf.placeholder(tf.int32, shape=(None, sample_num, 2), name="indices") xforms = tf.placeholder(tf.float32, shape=(None, 3, 3), name="xforms") rotations = tf.placeholder(tf.float32, shape=(None, 3, 3), name="rotations") jitter_range = tf.placeholder(tf.float32, shape=(1), name="jitter_range") global_step = tf.Variable(0, trainable=False, name='global_step') is_training = tf.placeholder(tf.bool, name='is_training') pts_fts = tf.placeholder(tf.float32, shape=(None, point_num, setting.data_dim), name='pts_fts') labels_seg = tf.placeholder(tf.int64, shape=(None, point_num), name='labels_seg') labels_weights = tf.placeholder(tf.float32, shape=(None, point_num), name='labels_weights') ###################################################################### points_sampled = tf.gather_nd(pts_fts, indices=indices, name='pts_fts_sampled') points_augmented = pf.augment(points_sampled, xforms, jitter_range) labels_sampled = tf.gather_nd(labels_seg, indices=indices, name='labels_sampled') labels_weights_sampled = tf.gather_nd(labels_weights, indices=indices, name='labels_weight_sampled') bn_decay_exp_op = tf.train.exponential_decay(0.5, global_step, setting.decay_steps, 0.5, staircase=True) bn_decay_op = tf.minimum(0.99, 1 - bn_decay_exp_op) logits_op = model.get_model(points_augmented, is_training, setting.sconv_params, setting.sdconv_params, setting.fc_params, sampling=setting.sampling, weight_decay=setting.weight_decay, bn_decay = bn_decay_op, part_num=setting.num_class) predictions = tf.argmax(logits_op, axis=-1, name='predictions') loss_op = tf.losses.sparse_softmax_cross_entropy(labels=labels_sampled, logits=logits_op, weights=labels_weights_sampled) with tf.name_scope('metrics'): loss_mean_op, loss_mean_update_op = tf.metrics.mean(loss_op) t_1_acc_op, t_1_acc_update_op = tf.metrics.accuracy(labels_sampled, predictions, weights=labels_weights_sampled) t_1_per_class_acc_op, t_1_per_class_acc_update_op = \ tf.metrics.mean_per_class_accuracy(labels_sampled, predictions, setting.num_class, weights=labels_weights_sampled) reset_metrics_op = tf.variables_initializer([var for var in tf.local_variables() if var.name.split('/')[0] == 'metrics']) _ = tf.summary.scalar('loss/train', tensor=loss_mean_op, collections=['train']) _ = tf.summary.scalar('t_1_acc/train', tensor=t_1_acc_op, collections=['train']) _ = tf.summary.scalar('t_1_per_class_acc/train', tensor=t_1_per_class_acc_op, collections=['train']) _ = tf.summary.scalar('loss/val', tensor=loss_mean_op, collections=['val']) _ = tf.summary.scalar('t_1_acc/val', tensor=t_1_acc_op, collections=['val']) _ = tf.summary.scalar('t_1_per_class_acc/val', tensor=t_1_per_class_acc_op, collections=['val']) lr_exp_op = tf.train.exponential_decay(setting.learning_rate_base, global_step, setting.decay_steps, setting.decay_rate, staircase=True) lr_clip_op = tf.maximum(lr_exp_op, setting.learning_rate_min) _ = tf.summary.scalar('learning_rate', tensor=lr_clip_op, collections=['train']) reg_loss = setting.weight_decay * tf.losses.get_regularization_loss() if setting.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=lr_clip_op) elif setting.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=lr_clip_op, momentum=setting.momentum, use_nesterov=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss_op + reg_loss, global_step=global_step) saver = tf.train.Saver(max_to_keep=None) folder_ckpt = os.path.join(root_folder, 'ckpts') if not os.path.exists(folder_ckpt): os.makedirs(folder_ckpt) folder_summary = os.path.join(root_folder, 'summary') if not os.path.exists(folder_summary): os.makedirs(folder_summary) parameter_num = np.sum([np.prod(v.shape.as_list()) for v in tf.trainable_variables()]) print('{}-Parameter number: {:d}.'.format(datetime.now(), parameter_num)) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False with tf.Session(config=config) as sess: summaries_op = tf.summary.merge_all('train') summaries_val_op = tf.summary.merge_all('val') summary_writer = tf.summary.FileWriter(folder_summary, sess.graph) sess.run(tf.global_variables_initializer()) # Load the model if args.load_ckpt is not None: saver.restore(sess, args.load_ckpt) print('{}-Checkpoint loaded from {}!'.format(datetime.now(), args.load_ckpt)) else: latest_ckpt = tf.train.latest_checkpoint(folder_ckpt) if latest_ckpt: print('{}-Found checkpoint {}'.format(datetime.now(), latest_ckpt)) saver.restore(sess, latest_ckpt) print('{}-Checkpoint loaded from {} (Iter {})'.format( datetime.now(), latest_ckpt, sess.run(global_step))) best_acc = 0 best_epoch = 0 for epoch in range(num_epochs): ############################### train ####################################### # Shuffle train files np.random.shuffle(seg_list) for file_idx_train in range(len(seg_list)): print('----epoch:'+str(epoch) + '--train file:' + str(file_idx_train) + '-----') filelist_train = seg_list[file_idx_train] data_train, _, data_num_train, label_train, _ = data_utils.load_seg(filelist_train) num_train = data_train.shape[0] if data_train.shape[-1] > 3: data_train = data_train[:,:,:3] data_train, data_num_train, label_train = \ data_utils.grouped_shuffle([data_train, data_num_train, label_train]) # data_train, label_train, _ = provider.shuffle_data_seg(data_train, label_train) batch_num = (num_train + batch_size - 1) // batch_size for batch_idx_train in range(batch_num): # Training start_idx = (batch_size * batch_idx_train) % num_train end_idx = min(start_idx + batch_size, num_train) batch_size_train = end_idx - start_idx points_batch = data_train[start_idx:end_idx, ...] points_num_batch = data_num_train[start_idx:end_idx, ...] labels_batch = label_train[start_idx:end_idx, ...] weights_batch = np.array(label_weights_list)[labels_batch] offset = int(random.gauss(0, sample_num * setting.sample_num_variance)) offset = max(offset, -sample_num * setting.sample_num_clip) offset = min(offset, sample_num * setting.sample_num_clip) sample_num_train = sample_num + offset xforms_np, rotations_np = pf.get_xforms(batch_size_train, rotation_range=rotation_range, scaling_range=scaling_range, order=setting.rotation_order) sess.run(reset_metrics_op) sess.run([train_op, loss_mean_update_op, t_1_acc_update_op, t_1_per_class_acc_update_op], feed_dict={ pts_fts: points_batch, indices: pf.get_indices(batch_size_train, sample_num_train, points_num_batch), xforms: xforms_np, rotations: rotations_np, jitter_range: np.array([jitter]), labels_seg: labels_batch, labels_weights: weights_batch, is_training: True, }) loss, t_1_acc, t_1_per_class_acc, summaries, step = sess.run([loss_mean_op, t_1_acc_op, t_1_per_class_acc_op, summaries_op, global_step]) summary_writer.add_summary(summaries, step) log_string('{}-[Train]-Iter: {:06d} Loss: {:.4f} T-1 Acc: {:.4f} T-1 mAcc: {:.4f}' .format(datetime.now(), step, loss, t_1_acc, t_1_per_class_acc)) sys.stdout.flush() ###################################################################### filename_ckpt = os.path.join(folder_ckpt, 'epoch') saver.save(sess, filename_ckpt, global_step=epoch) print('{}-Checkpoint saved to {}!'.format(datetime.now(), filename_ckpt)) sess.run(reset_metrics_op) for batch_val_idx in range(batch_num_val): start_idx = batch_size * batch_val_idx end_idx = min(start_idx + batch_size, num_val) batch_size_val = end_idx - start_idx points_batch = data_val[start_idx:end_idx, ...] points_num_batch = data_num_val[start_idx:end_idx, ...] labels_batch = label_val[start_idx:end_idx, ...] weights_batch = np.array(label_weights_list)[labels_batch] xforms_np, rotations_np = pf.get_xforms(batch_size_val, rotation_range=rotation_range_val, scaling_range=scaling_range_val, order=setting.rotation_order) sess.run([loss_mean_update_op, t_1_acc_update_op, t_1_per_class_acc_update_op], feed_dict={ pts_fts: points_batch, indices: pf.get_indices(batch_size_val, sample_num, points_num_batch), xforms: xforms_np, rotations: rotations_np, jitter_range: np.array([jitter_val]), labels_seg: labels_batch, labels_weights: weights_batch, is_training: False, }) loss_val, t_1_acc_val, t_1_per_class_acc_val, summaries_val, step = sess.run( [loss_mean_op, t_1_acc_op, t_1_per_class_acc_op, summaries_val_op, global_step]) summary_writer.add_summary(summaries_val, step) if t_1_per_class_acc_val > best_acc: best_acc = t_1_per_class_acc_val best_epoch = epoch log_string('{}-[Val ]-Average: Loss: {:.4f} T-1 Acc: {:.4f} T-1 mAcc: {:.4f} best epoch: {} Current epoch: {}' .format(datetime.now(), loss_val, t_1_acc_val, t_1_per_class_acc_val, best_epoch, epoch)) sys.stdout.flush() ###################################################################### print('{}-Done!'.format(datetime.now()))
def ExtractLocalFeatures(image, image_scales, max_feature_num, abs_thres, iou, attention_model_fn, stride_factor): """Extract local features for input image. Args: image: image tensor of type tf.uint8 with shape [h, w, channels]. image_scales: 1D float tensor which contains float scales used for image pyramid construction. max_feature_num: int tensor denotes the maximum selected feature points. abs_thres: float tensor denotes the score threshold for feature selection. iou: float scalar denotes the iou threshold for NMS. attention_model_fn: model function. Follows the signature: * Args: * `images`: Image tensor which is re-scaled. * Returns: * `attention_prob`: attention map after the non-linearity. * `feature_map`: feature map after ResNet convolution. stride_factor: integer accounting for striding after block3. Returns: boxes: [N, 4] float tensor which denotes the selected receptive box. N is the number of final feature points which pass through keypoint selection and NMS steps. features: [N, depth] float tensor. feature_scales: [N] float tensor. It is the inverse of the input image scales such that larger image scales correspond to larger image regions, which is compatible with keypoints detected with other techniques, for example Congas. scores: [N, 1] float tensor denotes the attention score. """ original_image_shape_float = tf.gather( tf.dtypes.cast(tf.shape(image), tf.float32), [0, 1]) image_tensor = gld.NormalizeImages(image, pixel_value_offset=128.0, pixel_value_scale=128.0) image_tensor = tf.expand_dims(image_tensor, 0, name='image/expand_dims') # Hard code the feature depth and receptive field parameters for now. rf, stride, padding = [291.0, 16.0 * stride_factor, 145.0] feature_depth = 1024 def _ProcessSingleScale(scale_index, boxes, features, scales, scores): """Resizes the image and run feature extraction and keypoint selection. This function will be passed into tf.while_loop() and be called repeatedly. The input boxes are collected from the previous iteration [0: scale_index -1]. We get the current scale by image_scales[scale_index], and run resize image, feature extraction and keypoint selection. Then we will get a new set of selected_boxes for current scale. In the end, we concat the previous boxes with current selected_boxes as the output. Args: scale_index: A valid index in the image_scales. boxes: Box tensor with the shape of [N, 4]. features: Feature tensor with the shape of [N, depth]. scales: Scale tensor with the shape of [N]. scores: Attention score tensor with the shape of [N]. Returns: scale_index: The next scale index for processing. boxes: Concatenated box tensor with the shape of [K, 4]. K >= N. features: Concatenated feature tensor with the shape of [K, depth]. scales: Concatenated scale tensor with the shape of [K]. scores: Concatenated score tensor with the shape of [K]. """ scale = tf.gather(image_scales, scale_index) new_image_size = tf.dtypes.cast( tf.round(original_image_shape_float * scale), tf.int32) resized_image = tf.image.resize(image_tensor, new_image_size) attention_prob, feature_map = attention_model_fn(resized_image) attention_prob = tf.squeeze(attention_prob, axis=[0]) feature_map = tf.squeeze(feature_map, axis=[0]) rf_boxes = feature_extractor.CalculateReceptiveBoxes( tf.shape(feature_map)[0], tf.shape(feature_map)[1], rf, stride, padding) # Re-project back to the original image space. rf_boxes = tf.divide(rf_boxes, scale) attention_prob = tf.reshape(attention_prob, [-1]) feature_map = tf.reshape(feature_map, [-1, feature_depth]) # Use attention score to select feature vectors. indices = tf.reshape(tf.where(attention_prob >= abs_thres), [-1]) selected_boxes = tf.gather(rf_boxes, indices) selected_features = tf.gather(feature_map, indices) selected_scores = tf.gather(attention_prob, indices) selected_scales = tf.ones_like(selected_scores, tf.float32) / scale # Concat with the previous result from different scales. boxes = tf.concat([boxes, selected_boxes], 0) features = tf.concat([features, selected_features], 0) scales = tf.concat([scales, selected_scales], 0) scores = tf.concat([scores, selected_scores], 0) return scale_index + 1, boxes, features, scales, scores output_boxes = tf.zeros([0, 4], dtype=tf.float32) output_features = tf.zeros([0, feature_depth], dtype=tf.float32) output_scales = tf.zeros([0], dtype=tf.float32) output_scores = tf.zeros([0], dtype=tf.float32) # Process the first scale separately, the following scales will reuse the # graph variables. (_, output_boxes, output_features, output_scales, output_scores) = _ProcessSingleScale(0, output_boxes, output_features, output_scales, output_scores) i = tf.constant(1, dtype=tf.int32) num_scales = tf.shape(image_scales)[0] keep_going = lambda j, b, f, scales, scores: tf.less(j, num_scales) (_, output_boxes, output_features, output_scales, output_scores) = tf.while_loop(cond=keep_going, body=_ProcessSingleScale, loop_vars=[ i, output_boxes, output_features, output_scales, output_scores ], shape_invariants=[ i.get_shape(), tf.TensorShape([None, 4]), tf.TensorShape([None, feature_depth]), tf.TensorShape([None]), tf.TensorShape([None]) ], back_prop=False) feature_boxes = box_list.BoxList(output_boxes) feature_boxes.add_field('features', output_features) feature_boxes.add_field('scales', output_scales) feature_boxes.add_field('scores', output_scores) nms_max_boxes = tf.minimum(max_feature_num, feature_boxes.num_boxes()) final_boxes = box_list_ops.non_max_suppression(feature_boxes, iou, nms_max_boxes) return final_boxes.get(), final_boxes.get_field( 'features'), final_boxes.get_field('scales'), tf.expand_dims( final_boxes.get_field('scores'), 1)
def get_loss(logits, \ mask_label, center_label, \ heading_class_label, heading_residual_label, \ size_class_label, size_residual_label, \ end_points, reg_weight=0.001): """ logits: BxNxC, mask_label: BxN, """ batch_size = logits.get_shape()[0].value mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=mask_label)) tf.summary.scalar('3d mask loss', mask_loss) center_dist = tf.norm(center_label - end_points['center'], axis=-1) center_loss = huber_loss(center_dist, delta=2.0) tf.summary.scalar('center loss', center_loss) stage1_center_dist = tf.norm(center_label - end_points['stage1_center'], axis=-1) stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0) tf.summary.scalar('stage1 center loss', stage1_center_loss) heading_class_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=end_points['heading_scores'], labels=heading_class_label)) tf.summary.scalar('heading class loss', heading_class_loss) tmp = tf.one_hot(heading_class_label, depth=NUM_HEADING_BIN, on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN print tmp heading_residual_normalized_label = heading_residual_label / (np.pi/NUM_HEADING_BIN) heading_residual_normalized_loss = huber_loss(tf.reduce_sum(end_points['heading_residuals_normalized']*tf.to_float(tmp), axis=1) - heading_residual_normalized_label, delta=1.0) print heading_residual_normalized_loss tf.summary.scalar('heading residual normalized loss', heading_residual_normalized_loss) size_class_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=end_points['size_scores'], labels=size_class_label)) tf.summary.scalar('size class loss', size_class_loss) tmp2 = tf.one_hot(size_class_label, depth=NUM_SIZE_CLUSTER, on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER tmp2_tiled = tf.tile(tf.expand_dims(tf.to_float(tmp2), -1), [1,1,3]) # BxNUM_SIZE_CLUSTERx3 predicted_size_residual_normalized = tf.reduce_sum(end_points['size_residuals_normalized']*tmp2_tiled, axis=[1]) # Bx3 tmp3 = tf.expand_dims(tf.constant(mean_size_arr, dtype=tf.float32),0) # 1xNUM_SIZE_CLUSTERx3 mean_size_label = tf.reduce_sum(tmp2_tiled * tmp3, axis=[1]) # Bx3 size_residual_label_normalized = size_residual_label / mean_size_label size_normalized_dist = tf.norm(size_residual_label_normalized - predicted_size_residual_normalized, axis=-1) size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0) print size_residual_normalized_loss tf.summary.scalar('size residual normalized loss', size_residual_normalized_loss) # Compute IOU 3D iou2ds, iou3ds = tf.py_func(compute_box3d_iou, [end_points['center'], end_points['heading_scores'], end_points['heading_residuals'], end_points['size_scores'], end_points['size_residuals'], center_label, heading_class_label, heading_residual_label, size_class_label, size_residual_label], [tf.float32, tf.float32]) tf.summary.scalar('iou_2d', tf.reduce_mean(iou2ds)) tf.summary.scalar('iou_3d', tf.reduce_mean(iou3ds)) end_points['iou2ds'] = iou2ds end_points['iou3ds'] = iou3ds # Compute BOX3D corners corners_3d = get_box3d_corners(end_points['center'], end_points['heading_residuals'], end_points['size_residuals']) # (B,NH,NS,8,3) gt_mask = tf.tile(tf.expand_dims(tmp, 2), [1,1,NUM_SIZE_CLUSTER]) * tf.tile(tf.expand_dims(tmp2,1), [1,NUM_HEADING_BIN,1]) # (B,NH,NS) corners_3d_pred = tf.reduce_sum(tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask,-1),-1))*corners_3d, axis=[1,2]) # (B,8,3) heading_bin_centers = tf.constant(np.arange(0,2*np.pi,2*np.pi/NUM_HEADING_BIN), dtype=tf.float32) # (NH,) heading_label = tf.expand_dims(heading_residual_label,1) + tf.expand_dims(heading_bin_centers, 0) # (B,NH) heading_label = tf.reduce_sum(tf.to_float(tmp)*heading_label, 1) mean_sizes = tf.expand_dims(tf.constant(mean_size_arr, dtype=tf.float32), 0) # (1,NS,3) size_label = mean_sizes + tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3) size_label = tf.reduce_sum(tf.expand_dims(tf.to_float(tmp2),-1)*size_label, axis=[1]) # (B,3) corners_3d_gt = get_box3d_corners_helper(center_label, heading_label, size_label) # (B,8,3) corners_3d_gt_flip = get_box3d_corners_helper(center_label, heading_label+np.pi, size_label) # (B,8,3) corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1), tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1)) print "Corners dist: ", corners_dist corners_loss = huber_loss(corners_dist, delta=1.0) tf.summary.scalar('corners loss', corners_loss) return mask_loss + (center_loss + heading_class_loss + size_class_loss + heading_residual_normalized_loss*20 + size_residual_normalized_loss*20 + stage1_center_loss)*0.1 + corners_loss