def obtain_localizations_and_softmax(self, net_output): # net_output: (batch_size, nboxes, ?) # Split net output: locs_enc = output_encoding.get_loc_enc(net_output) # (batch_size, nboxes, 4) logits = output_encoding.get_logits(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nboxes, nclasses) pc_enc = output_encoding.get_pc_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nboxes) or None dc_enc = output_encoding.get_dc_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nboxes) or None cm_enc = output_encoding.get_cm_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nboxes) or None # Decode localizations_dec = self.decode_boxes_wrt_orig_tf(locs_enc) # (batch_size, nboxes, 4) softmax = tf.nn.softmax(logits, axis=-1) # (batch_size, nboxes, nclasses) if self.opts.predict_pc: pc = CommonEncoding.decode_pc_or_dc_tf(pc_enc) else: pc = None if self.opts.predict_dc: dc = CommonEncoding.decode_pc_or_dc_tf(dc_enc) else: dc = None if self.opts.predict_cm: cm = CommonEncoding.decode_cm_tf(cm_enc) else: cm = None return localizations_dec, softmax, pc, dc, cm
def write_debug_info(self, *arg): # arg: [net_output, inputs0, ..., inputsN] # net_output: (batch_size, nboxes, 4+nclasses) # inputsX: (batch_size, height, width, 3) net_output = arg[0] # (batch_size, nboxes, n_channels_last) inputs_all_sizes = [] for i in range(1, len(arg)): inputs_all_sizes.append(arg[i]) batch_size = net_output.shape[0] localizations_enc = output_encoding.get_loc_enc(net_output) # (batch_size, nboxes, 4) logits = output_encoding.get_logits(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nboxes, nclasses) softmax = tools.softmax_np(logits) # (batch_size, nboxes, nclasses) cm_pred_enc = output_encoding.get_cm_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nboxes) or None cm_pred = CommonEncoding.decode_cm_np(cm_pred_enc) # (batch_size, nboxes) or None self.batch_count_debug += 1 batch_dir = os.path.join(self.debug_dir, 'batch' + str(self.batch_count_debug)) os.makedirs(batch_dir) dir_images = [] for img_idx in range(batch_size): dir_this_image = os.path.join(batch_dir, 'img' + str(img_idx)) dir_images.append(dir_this_image) os.makedirs(dir_this_image) for pos in range(self.n_boxes): grid_idx = self.get_grid_idx_from_flat_position(pos) inputs_this_box = inputs_all_sizes[grid_idx] # (batch_size, height, width, 3) anchor_coords = self.get_anchor_coords_wrt_its_input(pos, True) # [xmin, ymin, xmax, ymax] anc_xmin = anchor_coords[0] anc_ymin = anchor_coords[1] anc_xmax = anchor_coords[2] anc_ymax = anchor_coords[3] for img_idx in range(batch_size): dir_img = dir_images[img_idx] img = inputs_this_box[img_idx, anc_ymin:anc_ymax, anc_xmin:anc_xmax].copy() # (receptive_field_size, receptive_field_size, 3) img = tools.add_mean_again(img) path_to_save = os.path.join(dir_img, 'pos' + str(pos) + '.png') coords_enc = localizations_enc[img_idx, pos, :] # (4) coords_dec = CommonEncoding.decode_boxes_wrt_anchor_np(coords_enc, self.opts) # (4) if self.th_conf is None: predicted_class = np.argmax(softmax[img_idx, pos, :]) if predicted_class != self.background_id: conf = softmax[img_idx, pos, predicted_class] bbox = np.concatenate([np.expand_dims(predicted_class, axis=0), coords_dec, np.expand_dims(conf, axis=0)], axis=0) # (5) bbox = np.expand_dims(bbox, axis=0) # (1, 5) img = tools.add_bounding_boxes_to_image2(img, bbox, self.classnames, color=(127, 0, 127)) else: predicted_class = np.argmax(softmax[img_idx, pos, :-1]) max_conf_no_bkg = softmax[img_idx, pos, predicted_class] if max_conf_no_bkg > self.th_conf: bbox = np.concatenate([np.expand_dims(predicted_class, axis=0), coords_dec, np.expand_dims(max_conf_no_bkg, axis=0)], axis=0) # (5) bbox = np.expand_dims(bbox, axis=0) # (1, 5) img = tools.add_bounding_boxes_to_image2(img, bbox, self.classnames, color=(127, 0, 127)) cv2.imwrite(path_to_save, cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR)) # Save info file: info_file_path = os.path.join(dir_img, 'pos' + str(pos) + '_info.txt') self.write_anchor_info_file(info_file_path, softmax, predicted_class, cm_pred, img_idx, pos, coords_enc, coords_dec) return net_output
def make_loss_and_metrics(self, net_output, labels_enc_reord): # common_representation: (batch_size, lcr) # net_output: (batch_size, ?) # labels_enc_reord: (batch_size, n_labels) # Split net output: locs_enc = output_encoding.get_loc_enc(net_output) # (batch_size, 4) logits = output_encoding.get_logits( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nclasses) pc_pred = output_encoding.get_pc_enc( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size) or None dc_pred = output_encoding.get_dc_enc( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size) or None cm_pred = output_encoding.get_cm_enc( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size) or None mask_match = gt_encoding.get_mask_match( labels_enc_reord) # (batch_size) mask_neutral = gt_encoding.get_mask_neutral( labels_enc_reord) # (batch_size) gt_class_ids = gt_encoding.get_class_id( labels_enc_reord) # (batch_size) gt_coords = gt_encoding.get_coords_enc( labels_enc_reord) # (batch_size) pc_label = gt_encoding.get_pc_enc(labels_enc_reord) # (batch_size) dc_label = gt_encoding.get_dc_enc(labels_enc_reord) # (batch_size) cm_label = gt_encoding.get_cm_enc(labels_enc_reord) # (batch_size) mask_match = tf.greater(mask_match, 0.5) mask_neutral = tf.greater(mask_neutral, 0.5) zeros = tf.zeros_like(mask_match, dtype=tf.float32) # (batch_size) n_positives = tf.reduce_sum(tf.cast(mask_match, tf.int32), name='n_positives') # () conf_loss, accuracy_conf = classification_loss_and_metric( logits, mask_match, mask_neutral, gt_class_ids, zeros, n_positives, self.opts.negative_ratio) tf.summary.scalar('losses/conf_loss', conf_loss) tf.summary.scalar('metrics/accuracy_conf', accuracy_conf) total_loss = conf_loss loc_loss, iou_mean = localization_loss_and_metric( locs_enc, mask_match, mask_neutral, gt_coords, zeros, self.opts.loc_loss_factor, self.opts) tf.summary.scalar('losses/loc_loss', loc_loss) tf.summary.scalar('metrics/iou_mean', iou_mean) total_loss += loc_loss metrics = tf.stack([accuracy_conf, iou_mean]) # (2) if self.opts.predict_pc: pc_loss, pc_metric = pc_loss_and_metric(pc_pred, pc_label, mask_match, mask_neutral, zeros, self.opts.pc_loss_factor) tf.summary.scalar('losses/pc_loss', pc_loss) tf.summary.scalar('metrics/pc_mean_err', pc_metric) total_loss += pc_loss metrics = tf.concat( [metrics, tf.expand_dims(pc_metric, axis=0)], axis=0) if self.opts.predict_dc: dc_loss, dc_metric = dc_loss_and_metric(dc_pred, dc_label, mask_match, mask_neutral, zeros, self.opts.dc_loss_factor) tf.summary.scalar('losses/dc_loss', dc_loss) tf.summary.scalar('metrics/dc_mean_err', dc_metric) total_loss += dc_loss metrics = tf.concat( [metrics, tf.expand_dims(dc_metric, axis=0)], axis=0) if self.opts.predict_cm: cm_loss, cm_metric = cm_loss_and_metric(cm_pred, cm_label, mask_match, zeros, self.opts.cm_loss_factor) tf.summary.scalar('losses/cm_loss', cm_loss) tf.summary.scalar('metrics/cm_mean_err', cm_metric) total_loss += cm_loss metrics = tf.concat( [metrics, tf.expand_dims(cm_metric, axis=0)], axis=0) # total_loss: () # metrics: (n_metrics) return total_loss, metrics
def write_eval_debug_info(self, metrics, inputs_reord, labels_enc_reord, net_output, filenames): # inputs_reord: (batch_size, input_image_size, input_image_size, 3) # labels_enc_reord: (batch_size, n_labels) # net_output: (batch_size, ?) # filenames: (n_images_per_batch) # Split net output: locs_enc = output_encoding.get_loc_enc(net_output) # (batch_size, 4) logits = output_encoding.get_logits( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size, nclasses) pc_pred_enc = output_encoding.get_pc_enc( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size) or None dc_pred_enc = output_encoding.get_dc_enc( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size) or None cm_pred_enc = output_encoding.get_cm_enc( net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm) # (batch_size) or None pc_gt_enc = gt_encoding.get_pc_enc(labels_enc_reord) # (batch_size) dc_gt_enc = gt_encoding.get_dc_enc(labels_enc_reord) # (batch_size) cm_gt_enc = gt_encoding.get_cm_enc(labels_enc_reord) # (batch_size) softmax = tools.softmax_np(logits) # (batch_size, nclasses) pc_pred = CommonEncoding.decode_pc_np(pc_pred_enc) dc_pred = CommonEncoding.decode_dc_np(dc_pred_enc) cm_pred = CommonEncoding.decode_cm_np(cm_pred_enc) pc_gt = CommonEncoding.decode_pc_np(pc_gt_enc) dc_gt = CommonEncoding.decode_dc_np(dc_gt_enc) cm_gt = CommonEncoding.decode_cm_np(cm_gt_enc) if self.classnames is None: raise Exception('classnames must be specified if debugging.') self.batch_count_eval_debug += 1 batch_size = self.opts.n_crops_per_image * self.opts.n_images_per_batch filenames_ext = np.tile(np.expand_dims(filenames, axis=-1), [1, self.opts.n_crops_per_image ]) # (n_images_per_batch, n_crops_per_image) filenames_reord = np.reshape(filenames_ext, newshape=(batch_size)) # (batch_size) mask_match = gt_encoding.get_mask_match( labels_enc_reord) > 0.5 # (batch_size) mask_neutral = gt_encoding.get_mask_neutral( labels_enc_reord) > 0.5 # (batch_size) # Localizations: for i in range(batch_size): name = filenames_reord[i].decode(sys.getdefaultencoding()) try: crop = inputs_reord[ i, ...] # (input_image_size, input_image_size, 3) crop = tools.add_mean_again(crop) self.draw_dc_circle(crop) label_enc = labels_enc_reord[i, :] gt_class = int(gt_encoding.get_class_id(label_enc)) is_match = mask_match[i] is_neutral = mask_neutral[i] predicted_class = np.argmax(logits[i, :]) if is_neutral: self.n_neutrals += 1 file_name = 'batch' + str(self.batch_count_eval_debug) + '_' + name + '_crop' + str(i) + '_NEUTRAL_PRED-' + \ self.classnames[predicted_class] + '.png' crop_dir = self.neutral_dir else: if gt_class == self.background_id: self.n_backgrounds += 1 else: self.n_matches += 1 file_name = 'batch' + str(self.batch_count_eval_debug) + '_' + name + '_crop' + str(i) + '_GT-' + \ self.classnames[gt_class] + '_PRED-' + self.classnames[predicted_class] + '.png' if gt_class == predicted_class: crop_dir = self.correct_class_dir else: crop_dir = self.wrong_class_dir crop_path = os.path.join(crop_dir, file_name) if gt_class != self.background_id: gt_coords_enc = gt_encoding.get_coords_enc(label_enc) gt_coords_dec = CommonEncoding.decode_boxes_wrt_anchor_np( gt_coords_enc, self.opts) class_and_coords = np.concatenate( [np.expand_dims(gt_class, axis=0), gt_coords_dec], axis=0) # (5) class_and_coords = np.expand_dims(class_and_coords, axis=0) # (1, 5) crop = tools.add_bounding_boxes_to_image(crop, class_and_coords, color=(255, 0, 0)) else: assert not is_match, 'is_match is True, and gt_class it background_id.' if predicted_class != self.background_id: predicted_coords_enc = locs_enc[i, :] predicted_coords_dec = CommonEncoding.decode_boxes_wrt_anchor_np( predicted_coords_enc, self.opts) class_and_coords = np.concatenate([ np.expand_dims(predicted_class, axis=0), predicted_coords_dec ], axis=0) # (5) class_and_coords = np.expand_dims(class_and_coords, axis=0) # (1, 5) crop = tools.add_bounding_boxes_to_image(crop, class_and_coords, color=(127, 0, 127)) cv2.imwrite( crop_path, cv2.cvtColor(crop.astype(np.uint8), cv2.COLOR_RGB2BGR)) # Save info file: info_file_path = os.path.join( crop_dir, 'batch' + str(self.batch_count_eval_debug) + '_' + name + '_crop' + str(i) + '_info.txt') self.write_crop_info_file(info_file_path, softmax, predicted_class, gt_class, is_match, is_neutral, pc_gt, dc_gt, cm_gt, pc_pred, dc_pred, cm_pred, i) except: print('Error with image ' + name) raise total_crops = self.n_matches + self.n_backgrounds + self.n_neutrals print('n_matches: ' + str(self.n_matches) + ' (' + str(np.round(float(self.n_matches) / total_crops * 100.0, 2)) + '%)') print('n_backgrounds: ' + str(self.n_backgrounds) + ' (' + str(np.round(float(self.n_backgrounds) / total_crops * 100.0, 2)) + '%)') print('n_neutrals: ' + str(self.n_neutrals) + ' (' + str(np.round(float(self.n_neutrals) / total_crops * 100.0, 2)) + '%)') return metrics