示例#1
0
def repro_fig_4(gpu = None, interp = 'bicubic'):
    net = caffe.Net('/home/ruthfong/packages/caffe/models/bvlc_googlenet/deploy_force_backward.prototxt',
                   '/home/ruthfong/packages/caffe/models/bvlc_googlenet/bvlc_googlenet.caffemodel',
                    caffe.TEST)
    topName = 'loss3/classifier'
    bottomName = 'pool2/3x3_s2'
    zebra_i = 340
    elephant_i = 386 # African elephant; Indian elephant = 385
    transformer = get_ILSVRC_net_transformer(net)
    img_path = '/home/ruthfong/neural_coding/fnn_images/zeb-ele1.jpg'
    zebra_map = compute_heatmap(net = net, transformer = transformer, paths = img_path, 
                                labels = zebra_i, heatmap_type = 'excitation_backprop', 
                                topBlobName = topName, topLayerName = topName,
                                outputBlobName = bottomName, outputLayerName = bottomName, 
                                gpu = gpu)
    elephant_map = compute_heatmap(net = net, transformer = transformer, paths = img_path, 
                            labels = elephant_i, heatmap_type = 'excitation_backprop', 
                            topBlobName = topName, topLayerName = topName,
                            outputBlobName = bottomName, outputLayerName = bottomName, 
                            gpu = gpu)
    img = caffe.io.load_image(img_path)
    
    pylab.rcParams['figure.figsize'] = (12.0, 12.0)
    f, ax = plt.subplots(1, 3)
    ax[0].imshow(img)
    ax[1].imshow(overlay_map(img, zebra_map, overlay = False, interp = interp), 
                 interpolation = interp)
    #ax[1].set_title('zebra')
    ax[2].imshow(overlay_map(img, elephant_map, overlay = False, interp = interp), 
                 interpolation = interp)
示例#2
0
def repro_fig_3(gpu = None, interp = 'nearest'):
    net = caffe.Net('/home/ruthfong/packages/caffe/models/vgg16/VGG_ILSVRC_16_layers_deploy_force_backward.prototxt', 
                   '/home/ruthfong/packages/caffe/models/vgg16/VGG_ILSVRC_16_layers.caffemodel',
                    caffe.TEST)
    transformer = get_ILSVRC_net_transformer(net)
    
    topName = 'fc8'
    bottomNames = ['pool5', 'pool4', 'pool3', 'pool2', 'pool1']
    tabby_i = 281
    
    #img_path = '/home/ruthfong/packages/caffe/examples/images/cat.jpg'
    img_path = '/home/ruthfong/neural_coding/images/tabby_cat_cropped.jpg'
    img = caffe.io.load_image(img_path)
    
    pylab.rcParams['figure.figsize'] = (12.0, 12.0)
    
    f, ax = plt.subplots(1, len(bottomNames)+1)
    ax[0].imshow(img)
    
    for i in range(len(bottomNames)):
        heatmap = compute_heatmap(net = net, transformer = transformer, paths = img_path, 
                                 labels = tabby_i, heatmap_type = 'excitation_backprop', 
                                 topBlobName = topName, topLayerName = topName, 
                                 outputBlobName = bottomNames[i], outputLayerName = bottomNames[i],
                                 gpu = gpu)
        ax[i+1].imshow(overlay_map(img, heatmap, overlay = False, interp = interp), 
                       interpolation = interp)
def play_pointing_game(net,
                       transformer,
                       paths,
                       labels,
                       ann_paths,
                       heatmap_type,
                       labels_desc,
                       top_name='loss3/classifier-ft',
                       bottom_name='data',
                       norm_deg=np.inf,
                       batch_size=64,
                       gpu=None):
    num_imgs = len(paths)
    assert (num_imgs == len(labels))
    assert (num_imgs == len(ann_paths))
    num_classes = len(labels_desc)
    num_hits = np.zeros(num_classes)
    num_total = np.array([np.sum(labels == i) for i in range(num_classes)])
    num_diff_hits = np.zeros(num_classes)
    num_diff_total = np.zeros(num_classes)
    num_batches = int(np.ceil(num_imgs / float(batch_size)))
    print heatmap_type
    for i in range(num_batches):
        start = time.time()
        if (i + 1) * batch_size < num_imgs:
            idx = range(i * batch_size, (i + 1) * batch_size)
        else:
            idx = range(i * batch_size, num_imgs)

        if heatmap_type != 'center':
            heatmaps = compute_heatmap(net=net,
                                       transformer=transformer,
                                       paths=paths[idx],
                                       labels=labels[idx],
                                       heatmap_type=heatmap_type,
                                       topBlobName=top_name,
                                       topLayerName=top_name,
                                       outputBlobName=bottom_name,
                                       outputLayerName=bottom_name,
                                       norm_deg=norm_deg,
                                       gpu=gpu)
        for j in range(len(idx)):
            c = labels[idx[j]]
            resize = caffe.io.load_image(paths[idx[j]]).shape[:2]
            if heatmap_type == 'center':
                max_coords = (resize[1] / float(2), resize[0] / float(2))
                #max_coords (resize[1]/2, resize[0],2)
            else:
                max_coords = get_maximum_from_heatmap(heatmaps[j],
                                                      resize=resize)
            #print max_coords
            objs = load_objs(ann_paths[idx[j]])
            target_objs = objs[labels_desc[labels[idx[j]]]]
            is_hit = False
            exists_distractor = len(np.unique(objs.keys())) > 1
            bb_area = 0
            for k in range(len(target_objs)):
                bb_coords = target_objs[k]
                is_hit = is_hit or (bb_coords[0] <= max_coords[0]
                                    and bb_coords[1] <= max_coords[1]
                                    and bb_coords[2] >= max_coords[0]
                                    and bb_coords[3] >= max_coords[1])
                #print bb_coords, is_hit
                bb_area += (bb_coords[2] - bb_coords[0]) * (bb_coords[3] -
                                                            bb_coords[1])
                if is_hit and not exists_distractor:
                    break
            is_diff = exists_distractor and bb_area < 0.25 * np.prod(resize)
            if is_hit:
                num_hits[c] += 1
                num_diff_hits[c] += 1 if is_diff else 0
            num_diff_total[c] += 1 if is_diff else 0
        print '%d/%d: %.4f' % (i, num_batches, time.time() - start)

    accs = np.true_divide(num_hits, num_total)
    diff_accs = np.true_divide(num_diff_hits, num_diff_total)
    return (accs, num_hits, num_total, diff_accs, num_diff_hits,
            num_diff_total)
示例#4
0
def evalPointingGame(cocoAnn,
                     cat,
                     caffeNet,
                     imgDir,
                     transformer,
                     heatmapType,
                     topName='loss3/classifier',
                     bottomName='data',
                     normDeg=np.inf,
                     naiveMax=True,
                     maxImgs=None,
                     maskDir=None,
                     gpu=None):
    imgIds = cocoAnn.getImgIds(catIds=cat['id'])
    imgList = cocoAnn.loadImgs(ids=imgIds)
    hit = 0
    miss = 0
    hitDiff = 0
    missDiff = 0
    t0 = time.time()
    numImgs = len(imgList)
    if maxImgs is not None:
        numImgs = np.minimum(numImgs, maxImgs)
    accuracy = None
    accuracyDiff = None
    for i in range(numImgs):
        I = imgList[i]
        # run EB on img, get max location on attMap
        imgName = os.path.join(imgDir, I['file_name'])
        img = caffe.io.load_image(imgName)
        catLabel = tag2ID[cat['name']]
        if heatmapType == 'center':
            # choose center of image
            maxSub = (img.shape[0] / float(2), img.shape[1] / float(2))
        else:
            if heatmapType == 'mask':
                assert (maskDir is not None)
                mask_path = os.path.join(
                    maskDir, '%s_%d.npy' %
                    (imgName.strip('.jpg').split('/')[-1], catLabel))
                if not os.path.exists(mask_path):
                    print '%d: %s does not exist' % (i, mask_path)
                    break
                attMap = 1 - np.load(mask_path)
            elif heatmapType == 'contrast_excitation_backprop' and use_orig_imp:
                if i < 10:
                    print 'here'
                attMap = doExcitationBackprop(caffeNet, img, cat['name'])
            else:
                catLabel = tag2ID[cat['name']]
                attMap = compute_heatmap(net=caffeNet,
                                         transformer=transformer,
                                         paths=imgName,
                                         labels=catLabel,
                                         heatmap_type=heatmapType,
                                         topBlobName=topName,
                                         topLayerName=topName,
                                         outputBlobName=bottomName,
                                         outputLayerName=bottomName,
                                         norm_deg=normDeg,
                                         gpu=gpu)

            # reshape to original image
            attMap = transform.resize(attMap, (img.shape[:2]),
                                      order=3,
                                      mode='nearest')

            if naiveMax:
                # naively take argmax
                maxSub = np.unravel_index(np.argmax(attMap), attMap.shape)
            else:
                # take center of max locations
                maxAtt = np.max(attMap)
                maxInd = np.where(attMap == maxAtt)
                maxSub = (np.mean(maxInd[0]), np.mean(maxInd[1]))

        # determine if it's a difficult image (1) sum of the area of bounding boxes is less than 1/4 of image area,
        # 2) at least one distractor category
        allAnnList = cocoAnn.loadAnns(cocoAnn.getAnnIds(imgIds=I['id']))
        bbsArea = np.sum([a['area'] for a in allAnnList])
        imgArea = np.prod(img.shape[:2])
        numCats = len(np.unique([a['category_id'] for a in allAnnList]))
        isDiff = bbsArea < 0.25 * imgArea and numCats > 1

        # load annotations (for target category)
        annList = cocoAnn.loadAnns(
            cocoAnn.getAnnIds(imgIds=I['id'], catIds=cat['id']))

        # hit/miss?
        isHit = 0
        for ann in annList:
            # create a radius-15 circle around max location and see if it
            # intersects with segmentation mask
            if type(ann['segmentation']) == list:
                # polygon
                for seg in ann['segmentation']:
                    polyPts = np.array(seg).reshape((len(seg) / 2, 2))
                    poly = shapely.geometry.Polygon(polyPts)
                    circ = shapely.geometry.Point(maxSub[::-1]).buffer(15)
                    isHit += poly.intersects(circ)
            else:
                # RLE
                if type(ann['segmentation']['counts']) == list:
                    rle = mask.frPyObjects([ann['segmentation']], I['height'],
                                           I['width'])
                else:
                    rle = [ann['segmentation']]
                m = mask.decode(rle)
                m = m[:, :, 0]
                ind = np.where(m > 0)
                mp = shapely.geometry.MultiPoint(zip(ind[0], ind[1]))
                circ = shapely.geometry.Point(maxSub).buffer(15)
                isHit += circ.intersects(mp)

            if isHit:
                break

        if isHit:
            hit += 1
            hitDiff += 1 if isDiff else 0
        else:
            miss += 1
            missDiff += 1 if isDiff else 0
        try:
            accuracy = (hit + 0.0) / (hit + miss)
        except:
            accuracy = None
        try:
            accuracyDiff = (hitDiff + 0.0) / (hitDiff + missDiff)
        except:
            accuracyDiff = None

        if time.time() - t0 > 10:
            print cat[
                'name'], '(', i, '/', numImgs, '): Hit =', hit, 'Miss =', miss, ' Acc =', accuracy, ' Diff Hit =', hitDiff, ' Diff Miss =', missDiff, ' Diff Acc =', accuracyDiff
            t0 = time.time()

    return (accuracy, accuracyDiff)
示例#5
0
def main(argv):
    parser = argparse.ArgumentParser(description='Save numpy files of heatmaps (use default settings).') # TODO make default settings a boolean flag

    parser.add_argument('dataset', default='imagenet', type=str, help="choose from ['imagenet', 'voc2007', 'COCO']")
    parser.add_argument('split', default='val', type=str, help="choose from ['train', 'train_heldout', 'val', 'test']")
    parser.add_argument('heatmap', default='saliency', type=str, 
    	help="choose from ['saliency', 'guided_backprop', 'excitation_backprop', 'contrast_excitation_backprop', 'grad_cam'")
    parser.add_argument('-r', '--results_dir', default=None, type=str, help="directory to save heatmaps")
    parser.add_argument('-g', '--gpu', default=None, type=int, help="zero-indexed gpu to use [i.e. 0-3]")
    parser.add_argument('-b', '--batch_size', default=64, type=int, help="batch size")
    #parser.add_argument('-t', '--top_name', default='loss3/classifier', type=str, help="name of the top layer")
    #parser.add_argument('-b', '--bottom_name', default='data', type=str, help="name of the bottom layer")
    #parser.add_argument('-n', '--norm_deg', default=np.inf, type=int)
    parser.add_argument('-a', '--start', default=0, type=int, help="start index")
    parser.add_argument('-z', '--end', default=None, type=int, help="end index")

    args = parser.parse_args(argv)
    dataset = args.dataset
    split = args.split
    heatmap_type = args.heatmap
    results_dir = args.results_dir
    gpu = args.gpu
    batch_size = args.batch_size
    #top_name = args.top_name
    start = args.start
    end = args.end

    if gpu is None:
        caffe.set_mode_cpu()
    else:
        caffe.set_device(gpu)
        caffe.set_mode_gpu()

    if dataset == 'imagenet':
        net = get_net('googlenet')
        top_name = 'loss3/classifier'
        labels_desc = np.loadtxt('/home/ruthfong/packages/caffe/data/ilsvrc12/synset_words.txt', str, delimiter='\t')
        #synsets = np.loadtxt('/home/ruthfong/packages/caffe/data/ilsvrc12/synsets.txt', str, delimiter='\t')
        transformer = get_ILSVRC_net_transformer(net)
        if split == 'train_heldout':
            (paths, labels) = read_imdb('/home/ruthfong/packages/caffe/data/ilsvrc12/annotated_train_heldout_imdb.txt')
        elif split == 'val':
            (paths, labels) = read_imdb('/home/ruthfong/packages/caffe/data/ilsvrc12/val_imdb.txt')
        elif split == 'animal_parts':
            (paths, labels) = read_imdb('/home/ruthfong/packages/caffe/data/ilsvrc12/animal_parts_require_both_min_10_imdb.txt')
        else:
            print '%s is not supported' % split
        paths = np.array(paths)
        labels = np.array(labels)
        #ann_dir = '/data/ruthfong/ILSVRC2012/annotated_train_heldout_ground_truth_annotations'
        #ann_paths = [os.path.join(ann_dir, f) for f in os.listdir(ann_dir)]
    elif dataset == 'voc2007':
        net = get_net('googlenet_voc')
        top_name = 'loss3/classifier-ft'

        voc_dir = '/data/ruthfong/VOCdevkit/VOC2007/'
        labels_desc = voc_labels_desc
        transformer = get_VOC_net_transformer(net)
        (paths, labels) = read_imdb(os.path.join(voc_dir, 'caffe/%s.txt' % split))
        #ann_dir = os.path.join(voc_dir, 'Annotations')
        #ann_paths = np.array([os.path.join(ann_dir, f.strip('.jpg') + '.xml') for f in paths])
        paths = np.array([os.path.join(voc_dir, 'JPEGImages', f) for f in paths])
    else:
    	assert(False)

    if results_dir is not None and not os.path.exists(results_dir):
        os.makedirs(results_dir)

    if end is None:
        end = len(paths)

    if heatmap == 'excitation_backprop':
        norm_deg = -1
        bottom_name = 'pool2/3x3_s2'
    elif heatmap == 'contrast_excitation_backprop':
        norm_deg = -2
        bottom_name = 'pool2/3x3_s2'
    elif heatmap == 'grad_cam':
    	norm_deg = None
    	bottom_name = 'inception_4e/output' 
    else:
        norm_deg = np.inf
        bottom_name = 'data'
    
    img_idx = range(start, end)
    num_imgs = len(img_idx)
    num_batches = int(np.ceil(num_imgs/float(batch_size)))
    for i in range(num_batches):
    	start_time = time.time()
        if (i+1)*batch_size < num_imgs:
            idx = img_idx[range(i*batch_size, (i+1)*batch_size)]
        else:
            idx = img_idx[range(i*batch_size, num_imgs)]
        out_file = os.path.join(results_dir, '%d.npy' % idx[-1])
        if os.path.exists(out_file):
            print '%s already exists; skipping batch from %d to %d' % (out_file, idx[0], idx[-1])
            continue
        heatmaps = compute_heatmap(net, transformer, paths[idx], labels[idx], heatmap_type, top_name, top_name,
                    outputBlobName = bottom_name, outputLayerName = bottom_name, norm_deg = norm_deg, gpu = gpu)
        for j in range(len(idx)):
        	out_file = os.path.join(results_dir, '%d.npy' % idx[j])
        	np.save(out_file, heatmaps[j])
        print 'gpu %d - batch %d/%d complete [%d-%d] (time: %.4f s)' % (gpu if gpu is not None else -1, i, num_batches, 
        	idx[0], idx[-1], time.time() - start_time)