示例#1
0
    def obtain_localizations_and_softmax(self, net_output):
        # net_output: (batch_size, nboxes, ?)

        # Split net output:
        locs_enc = output_encoding.get_loc_enc(net_output)  # (batch_size, nboxes, 4)
        logits = output_encoding.get_logits(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm)  # (batch_size, nboxes, nclasses)
        pc_enc = output_encoding.get_pc_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm)  # (batch_size, nboxes) or None
        dc_enc = output_encoding.get_dc_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm)  # (batch_size, nboxes) or None
        cm_enc = output_encoding.get_cm_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm)  # (batch_size, nboxes) or None

        # Decode
        localizations_dec = self.decode_boxes_wrt_orig_tf(locs_enc)  # (batch_size, nboxes, 4)
        softmax = tf.nn.softmax(logits, axis=-1)  # (batch_size, nboxes, nclasses)
        if self.opts.predict_pc:
            pc = CommonEncoding.decode_pc_or_dc_tf(pc_enc)
        else:
            pc = None
        if self.opts.predict_dc:
            dc = CommonEncoding.decode_pc_or_dc_tf(dc_enc)
        else:
            dc = None
        if self.opts.predict_cm:
            cm = CommonEncoding.decode_cm_tf(cm_enc)
        else:
            cm = None

        return localizations_dec, softmax, pc, dc, cm
示例#2
0
    def write_debug_info(self, *arg):
        # arg: [net_output, inputs0, ..., inputsN]
        # net_output: (batch_size, nboxes, 4+nclasses)
        # inputsX: (batch_size, height, width, 3)
        net_output = arg[0]  # (batch_size, nboxes, n_channels_last)
        inputs_all_sizes = []
        for i in range(1, len(arg)):
            inputs_all_sizes.append(arg[i])
        batch_size = net_output.shape[0]

        localizations_enc = output_encoding.get_loc_enc(net_output)  # (batch_size, nboxes, 4)
        logits = output_encoding.get_logits(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm)  # (batch_size, nboxes, nclasses)
        softmax = tools.softmax_np(logits)  # (batch_size, nboxes, nclasses)
        cm_pred_enc = output_encoding.get_cm_enc(net_output, self.opts.predict_pc, self.opts.predict_dc, self.opts.predict_cm)  # (batch_size, nboxes) or None
        cm_pred = CommonEncoding.decode_cm_np(cm_pred_enc)  # (batch_size, nboxes) or None

        self.batch_count_debug += 1
        batch_dir = os.path.join(self.debug_dir, 'batch' + str(self.batch_count_debug))
        os.makedirs(batch_dir)
        dir_images = []
        for img_idx in range(batch_size):
            dir_this_image = os.path.join(batch_dir, 'img' + str(img_idx))
            dir_images.append(dir_this_image)
            os.makedirs(dir_this_image)
        for pos in range(self.n_boxes):
            grid_idx = self.get_grid_idx_from_flat_position(pos)
            inputs_this_box = inputs_all_sizes[grid_idx]  # (batch_size, height, width, 3)
            anchor_coords = self.get_anchor_coords_wrt_its_input(pos, True)  # [xmin, ymin, xmax, ymax]
            anc_xmin = anchor_coords[0]
            anc_ymin = anchor_coords[1]
            anc_xmax = anchor_coords[2]
            anc_ymax = anchor_coords[3]
            for img_idx in range(batch_size):
                dir_img = dir_images[img_idx]
                img = inputs_this_box[img_idx, anc_ymin:anc_ymax, anc_xmin:anc_xmax].copy()  # (receptive_field_size, receptive_field_size, 3)
                img = tools.add_mean_again(img)
                path_to_save = os.path.join(dir_img, 'pos' + str(pos) + '.png')
                coords_enc = localizations_enc[img_idx, pos, :]  # (4)
                coords_dec = CommonEncoding.decode_boxes_wrt_anchor_np(coords_enc, self.opts)  # (4)
                if self.th_conf is None:
                    predicted_class = np.argmax(softmax[img_idx, pos, :])
                    if predicted_class != self.background_id:
                        conf = softmax[img_idx, pos, predicted_class]
                        bbox = np.concatenate([np.expand_dims(predicted_class, axis=0), coords_dec, np.expand_dims(conf, axis=0)], axis=0)  # (5)
                        bbox = np.expand_dims(bbox, axis=0)  # (1, 5)
                        img = tools.add_bounding_boxes_to_image2(img, bbox, self.classnames, color=(127, 0, 127))
                else:
                    predicted_class = np.argmax(softmax[img_idx, pos, :-1])
                    max_conf_no_bkg = softmax[img_idx, pos, predicted_class]
                    if max_conf_no_bkg > self.th_conf:
                        bbox = np.concatenate([np.expand_dims(predicted_class, axis=0), coords_dec, np.expand_dims(max_conf_no_bkg, axis=0)], axis=0)  # (5)
                        bbox = np.expand_dims(bbox, axis=0)  # (1, 5)
                        img = tools.add_bounding_boxes_to_image2(img, bbox, self.classnames, color=(127, 0, 127))
                cv2.imwrite(path_to_save, cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR))
                # Save info file:
                info_file_path = os.path.join(dir_img, 'pos' + str(pos) + '_info.txt')
                self.write_anchor_info_file(info_file_path, softmax, predicted_class, cm_pred, img_idx, pos, coords_enc, coords_dec)
        return net_output
示例#3
0
    def make_loss_and_metrics(self, net_output, labels_enc_reord):
        # common_representation: (batch_size, lcr)
        # net_output: (batch_size, ?)
        # labels_enc_reord: (batch_size, n_labels)

        # Split net output:
        locs_enc = output_encoding.get_loc_enc(net_output)  # (batch_size, 4)
        logits = output_encoding.get_logits(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size, nclasses)
        pc_pred = output_encoding.get_pc_enc(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size) or None
        dc_pred = output_encoding.get_dc_enc(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size) or None
        cm_pred = output_encoding.get_cm_enc(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size) or None

        mask_match = gt_encoding.get_mask_match(
            labels_enc_reord)  # (batch_size)
        mask_neutral = gt_encoding.get_mask_neutral(
            labels_enc_reord)  # (batch_size)
        gt_class_ids = gt_encoding.get_class_id(
            labels_enc_reord)  # (batch_size)
        gt_coords = gt_encoding.get_coords_enc(
            labels_enc_reord)  # (batch_size)
        pc_label = gt_encoding.get_pc_enc(labels_enc_reord)  # (batch_size)
        dc_label = gt_encoding.get_dc_enc(labels_enc_reord)  # (batch_size)
        cm_label = gt_encoding.get_cm_enc(labels_enc_reord)  # (batch_size)

        mask_match = tf.greater(mask_match, 0.5)
        mask_neutral = tf.greater(mask_neutral, 0.5)

        zeros = tf.zeros_like(mask_match, dtype=tf.float32)  # (batch_size)
        n_positives = tf.reduce_sum(tf.cast(mask_match, tf.int32),
                                    name='n_positives')  # ()

        conf_loss, accuracy_conf = classification_loss_and_metric(
            logits, mask_match, mask_neutral, gt_class_ids, zeros, n_positives,
            self.opts.negative_ratio)
        tf.summary.scalar('losses/conf_loss', conf_loss)
        tf.summary.scalar('metrics/accuracy_conf', accuracy_conf)
        total_loss = conf_loss

        loc_loss, iou_mean = localization_loss_and_metric(
            locs_enc, mask_match, mask_neutral, gt_coords, zeros,
            self.opts.loc_loss_factor, self.opts)
        tf.summary.scalar('losses/loc_loss', loc_loss)
        tf.summary.scalar('metrics/iou_mean', iou_mean)
        total_loss += loc_loss

        metrics = tf.stack([accuracy_conf, iou_mean])  # (2)

        if self.opts.predict_pc:
            pc_loss, pc_metric = pc_loss_and_metric(pc_pred, pc_label,
                                                    mask_match, mask_neutral,
                                                    zeros,
                                                    self.opts.pc_loss_factor)
            tf.summary.scalar('losses/pc_loss', pc_loss)
            tf.summary.scalar('metrics/pc_mean_err', pc_metric)
            total_loss += pc_loss
            metrics = tf.concat(
                [metrics, tf.expand_dims(pc_metric, axis=0)], axis=0)

        if self.opts.predict_dc:
            dc_loss, dc_metric = dc_loss_and_metric(dc_pred, dc_label,
                                                    mask_match, mask_neutral,
                                                    zeros,
                                                    self.opts.dc_loss_factor)
            tf.summary.scalar('losses/dc_loss', dc_loss)
            tf.summary.scalar('metrics/dc_mean_err', dc_metric)
            total_loss += dc_loss
            metrics = tf.concat(
                [metrics, tf.expand_dims(dc_metric, axis=0)], axis=0)

        if self.opts.predict_cm:
            cm_loss, cm_metric = cm_loss_and_metric(cm_pred, cm_label,
                                                    mask_match, zeros,
                                                    self.opts.cm_loss_factor)
            tf.summary.scalar('losses/cm_loss', cm_loss)
            tf.summary.scalar('metrics/cm_mean_err', cm_metric)
            total_loss += cm_loss
            metrics = tf.concat(
                [metrics, tf.expand_dims(cm_metric, axis=0)], axis=0)

        # total_loss: ()
        # metrics: (n_metrics)

        return total_loss, metrics
示例#4
0
    def write_eval_debug_info(self, metrics, inputs_reord, labels_enc_reord,
                              net_output, filenames):
        # inputs_reord: (batch_size, input_image_size, input_image_size, 3)
        # labels_enc_reord: (batch_size, n_labels)
        # net_output: (batch_size, ?)
        # filenames: (n_images_per_batch)

        # Split net output:
        locs_enc = output_encoding.get_loc_enc(net_output)  # (batch_size, 4)
        logits = output_encoding.get_logits(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size, nclasses)
        pc_pred_enc = output_encoding.get_pc_enc(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size) or None
        dc_pred_enc = output_encoding.get_dc_enc(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size) or None
        cm_pred_enc = output_encoding.get_cm_enc(
            net_output, self.opts.predict_pc, self.opts.predict_dc,
            self.opts.predict_cm)  # (batch_size) or None

        pc_gt_enc = gt_encoding.get_pc_enc(labels_enc_reord)  # (batch_size)
        dc_gt_enc = gt_encoding.get_dc_enc(labels_enc_reord)  # (batch_size)
        cm_gt_enc = gt_encoding.get_cm_enc(labels_enc_reord)  # (batch_size)

        softmax = tools.softmax_np(logits)  # (batch_size, nclasses)
        pc_pred = CommonEncoding.decode_pc_np(pc_pred_enc)
        dc_pred = CommonEncoding.decode_dc_np(dc_pred_enc)
        cm_pred = CommonEncoding.decode_cm_np(cm_pred_enc)

        pc_gt = CommonEncoding.decode_pc_np(pc_gt_enc)
        dc_gt = CommonEncoding.decode_dc_np(dc_gt_enc)
        cm_gt = CommonEncoding.decode_cm_np(cm_gt_enc)

        if self.classnames is None:
            raise Exception('classnames must be specified if debugging.')
        self.batch_count_eval_debug += 1
        batch_size = self.opts.n_crops_per_image * self.opts.n_images_per_batch
        filenames_ext = np.tile(np.expand_dims(filenames, axis=-1),
                                [1, self.opts.n_crops_per_image
                                 ])  # (n_images_per_batch, n_crops_per_image)
        filenames_reord = np.reshape(filenames_ext,
                                     newshape=(batch_size))  # (batch_size)
        mask_match = gt_encoding.get_mask_match(
            labels_enc_reord) > 0.5  # (batch_size)
        mask_neutral = gt_encoding.get_mask_neutral(
            labels_enc_reord) > 0.5  # (batch_size)

        # Localizations:
        for i in range(batch_size):
            name = filenames_reord[i].decode(sys.getdefaultencoding())
            try:
                crop = inputs_reord[
                    i, ...]  # (input_image_size, input_image_size, 3)
                crop = tools.add_mean_again(crop)
                self.draw_dc_circle(crop)
                label_enc = labels_enc_reord[i, :]
                gt_class = int(gt_encoding.get_class_id(label_enc))
                is_match = mask_match[i]
                is_neutral = mask_neutral[i]
                predicted_class = np.argmax(logits[i, :])
                if is_neutral:
                    self.n_neutrals += 1
                    file_name = 'batch' + str(self.batch_count_eval_debug) + '_' + name + '_crop' + str(i) + '_NEUTRAL_PRED-' + \
                                self.classnames[predicted_class] + '.png'
                    crop_dir = self.neutral_dir
                else:
                    if gt_class == self.background_id:
                        self.n_backgrounds += 1
                    else:
                        self.n_matches += 1
                    file_name = 'batch' + str(self.batch_count_eval_debug) + '_' + name + '_crop' + str(i) + '_GT-' + \
                                self.classnames[gt_class] + '_PRED-' + self.classnames[predicted_class] + '.png'
                    if gt_class == predicted_class:
                        crop_dir = self.correct_class_dir
                    else:
                        crop_dir = self.wrong_class_dir
                crop_path = os.path.join(crop_dir, file_name)

                if gt_class != self.background_id:
                    gt_coords_enc = gt_encoding.get_coords_enc(label_enc)
                    gt_coords_dec = CommonEncoding.decode_boxes_wrt_anchor_np(
                        gt_coords_enc, self.opts)
                    class_and_coords = np.concatenate(
                        [np.expand_dims(gt_class, axis=0), gt_coords_dec],
                        axis=0)  # (5)
                    class_and_coords = np.expand_dims(class_and_coords,
                                                      axis=0)  # (1, 5)
                    crop = tools.add_bounding_boxes_to_image(crop,
                                                             class_and_coords,
                                                             color=(255, 0, 0))
                else:
                    assert not is_match, 'is_match is True, and gt_class it background_id.'
                if predicted_class != self.background_id:
                    predicted_coords_enc = locs_enc[i, :]
                    predicted_coords_dec = CommonEncoding.decode_boxes_wrt_anchor_np(
                        predicted_coords_enc, self.opts)
                    class_and_coords = np.concatenate([
                        np.expand_dims(predicted_class, axis=0),
                        predicted_coords_dec
                    ],
                                                      axis=0)  # (5)
                    class_and_coords = np.expand_dims(class_and_coords,
                                                      axis=0)  # (1, 5)
                    crop = tools.add_bounding_boxes_to_image(crop,
                                                             class_and_coords,
                                                             color=(127, 0,
                                                                    127))
                cv2.imwrite(
                    crop_path,
                    cv2.cvtColor(crop.astype(np.uint8), cv2.COLOR_RGB2BGR))
                # Save info file:
                info_file_path = os.path.join(
                    crop_dir, 'batch' + str(self.batch_count_eval_debug) +
                    '_' + name + '_crop' + str(i) + '_info.txt')
                self.write_crop_info_file(info_file_path, softmax,
                                          predicted_class, gt_class, is_match,
                                          is_neutral, pc_gt, dc_gt, cm_gt,
                                          pc_pred, dc_pred, cm_pred, i)

            except:
                print('Error with image ' + name)
                raise

        total_crops = self.n_matches + self.n_backgrounds + self.n_neutrals
        print('n_matches: ' + str(self.n_matches) + ' (' +
              str(np.round(float(self.n_matches) / total_crops * 100.0, 2)) +
              '%)')
        print('n_backgrounds: ' + str(self.n_backgrounds) + ' (' +
              str(np.round(float(self.n_backgrounds) / total_crops *
                           100.0, 2)) + '%)')
        print('n_neutrals: ' + str(self.n_neutrals) + ' (' +
              str(np.round(float(self.n_neutrals) / total_crops * 100.0, 2)) +
              '%)')

        return metrics