def viz_cam(model_file, data_dir): ds = get_data('val') pred_config = PredictConfig( model=Model(), session_init=SmartInit(model_file), input_names=['input', 'label'], output_names=['wrong-top1', 'group3new/bnlast/Relu', 'linearnew/W'], return_input=True) meta = dataset.ILSVRCMeta().get_synset_words_1000() pred = SimpleDatasetPredictor(pred_config, ds) cnt = 0 for inp, outp in pred.get_result(): images, labels = inp wrongs, convmaps, W = outp batch = wrongs.shape[0] for i in range(batch): if wrongs[i]: continue weight = W[:, [labels[i]]].T # 512x1 convmap = convmaps[i, :, :, :] # 512xhxw mergedmap = np.matmul(weight, convmap.reshape( (512, -1))).reshape(14, 14) mergedmap = cv2.resize(mergedmap, (224, 224)) heatmap = viz.intensity_to_rgb(mergedmap, normalize=True) blend = images[i] * 0.5 + heatmap * 0.5 concat = np.concatenate((images[i], heatmap, blend), axis=1) classname = meta[labels[i]].split(',')[0] cv2.imwrite('cam{}-{}.jpg'.format(cnt, classname), concat) cnt += 1 if cnt == 500: return
def get_data(train_or_test): isTrain = train_or_test == 'train' ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False) meta = dataset.ILSVRCMeta() pp_mean = meta.get_per_pixel_mean() if isTrain: # TODO use the augmentor in GoogleNet augmentors = [ imgaug.Resize((256, 256)), imgaug.Brightness(30, False), imgaug.Contrast((0.8, 1.2), True), imgaug.MapImage(lambda x: x - pp_mean), imgaug.RandomCrop((224, 224)), imgaug.Flip(horiz=True), ] else: augmentors = [ imgaug.Resize((256, 256)), imgaug.MapImage(lambda x: x - pp_mean), imgaug.CenterCrop((224, 224)), ] ds = AugmentImageComponent(ds, augmentors, copy=False) ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain) if isTrain: ds = PrefetchDataZMQ(ds, 6) return ds
def get_imglist(dir, name): """ Args: dir(str): directory which contains name name(str): 'train' or 'val' Returns: [(full filename, label)] """ dir = os.path.join(dir, name) meta = dataset.ILSVRCMeta() imglist = meta.get_image_list(name, dataset.ILSVRCMeta.guess_dir_structure(dir)) def _filter(fname): # png return 'n02105855_2933.JPEG' in fname ret = [] for fname, label in imglist: if _filter(fname): logger.info("Image {} was filtered out.".format(fname)) continue fname = os.path.join(dir, fname) ret.append((fname, label)) return ret
def run_image(model, sess_init, inputs): pred_config = PredictConfig( model=model, session_init=sess_init, input_names=['input'], output_names=['output'] ) predictor = OfflinePredictor(pred_config) meta = dataset.ILSVRCMeta() words = meta.get_synset_words_1000() transformers = imgaug.AugmentorList(fbresnet_augmentor(isTrain=False)) for f in inputs: assert os.path.isfile(f), f img = cv2.imread(f).astype('float32') assert img is not None img = transformers.augment(img)[np.newaxis, :, :, :] outputs = predictor(img)[0] prob = outputs[0] ret = prob.argsort()[-10:][::-1] names = [words[i] for i in ret] print(f + ":") print(list(zip(names, prob[ret])))
def get_data(train_or_test): isTrain = train_or_test == 'train' ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False) meta = dataset.ILSVRCMeta() pp_mean = meta.get_per_pixel_mean() pp_mean_299 = cv2.resize(pp_mean, (299, 299)) if isTrain: class Resize(imgaug.ImageAugmentor): def __init__(self): self._init(locals()) def _augment(self, img, _): h, w = img.shape[:2] size = 299 scale = self.rng.randint(size, 340) * 1.0 / min(h, w) scaleX = scale * self.rng.uniform(0.85, 1.15) scaleY = scale * self.rng.uniform(0.85, 1.15) desSize = map(int, (max(size, min(w, scaleX * w)), max(size, min(h, scaleY * h)))) dst = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC) return dst augmentors = [ Resize(), imgaug.Rotation(max_deg=10), imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5), imgaug.Brightness(30, True), imgaug.Gamma(), imgaug.Contrast((0.8, 1.2), True), imgaug.RandomCrop((299, 299)), imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8), imgaug.RandomApplyAug(imgaug.GaussianDeform( [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)], (299, 299), 0.2, 3), 0.1), imgaug.Flip(horiz=True), imgaug.MapImage(lambda x: x - pp_mean_299), ] else: def resize_func(im): h, w = im.shape[:2] scale = 340.0 / min(h, w) desSize = map(int, (max(299, min(w, scale * w)), max(299, min(h, scale * h)))) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) return im augmentors = [ imgaug.MapImage(resize_func), imgaug.CenterCrop((299, 299)), imgaug.MapImage(lambda x: x - pp_mean_299), ] ds = AugmentImageComponent(ds, augmentors, copy=False) ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain) if isTrain: ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count())) return ds
def get_data(train_or_test): isTrain = train_or_test == 'train' augs = fbresnet_augmentor(isTrain) meta = dataset.ILSVRCMeta() pp_mean = meta.get_per_pixel_mean() augs.append(imgaug.MapImage(lambda x: x - pp_mean[16:-16, 16:-16])) ds = get_imagenet_dataflow(args.data, train_or_test, BATCH_SIZE, augs) return ds
def get_data(train_or_test): isTrain = train_or_test == 'train' augs = fbresnet_augmentor(isTrain) meta = dataset.ILSVRCMeta() pp_mean = meta.get_per_pixel_mean() augs.append(imgaug.MapImage(lambda x: x - pp_mean[16:-16, 16:-16])) data_path = TRAIN_LIST_PATH if train_or_test == 'train' else VALID_LIST_PATH ds = get_imagenet_dataflow(data_path, train_or_test, BATCH_SIZE, augs) return ds
def run_image(model, sess_init, inputs): pred_config = PredictConfig( model=model, session_init=sess_init, input_names=['input'], output_names=['output'] ) predictor = OfflinePredictor(pred_config) meta = dataset.ILSVRCMeta() pp_mean = meta.get_per_pixel_mean() pp_mean_224 = pp_mean[16:-16, 16:-16, :] words = meta.get_synset_words_1000() def resize_func(im): h, w = im.shape[:2] scale = 256.0 / min(h, w) desSize = map(int, (max(224, min(w, scale * w)), max(224, min(h, scale * h)))) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) return im transformers = imgaug.AugmentorList([ imgaug.MapImage(resize_func), imgaug.CenterCrop((224, 224)), imgaug.MapImage(lambda x: x - pp_mean_224), ]) for f in inputs: assert os.path.isfile(f) img = cv2.imread(f).astype('float32') assert img is not None img = transformers.augment(img)[np.newaxis, :, :, :] outputs = predictor(img)[0] prob = outputs[0] ret = prob.argsort()[-10:][::-1] names = [words[i] for i in ret] print(f + ":") print(list(zip(names, prob[ret])))