示例#1
0
def create_image_dataset_label_index(directory, batch_size=64, ahead=16):
    ed = expdir.ExperimentDirectory(directory)
    info = ed.load_info()
    ds = loadseg.SegmentationData(info.dataset)
    categories = ds.category_names()
    shape = (ds.size(), len(DATASETS), len(ds.label))
    index = np.zeros(shape)
    pf = loadseg.SegmentationPrefetcher(ds,
                                        categories=categories,
                                        once=True,
                                        batch_size=batch_size,
                                        ahead=ahead,
                                        thread=True)
    batch_count = 0
    for batch in pf.batches():
        if batch_count % 100 == 0:
            print('Processing batch %d ...' % batch_count)
        for rec in batch:
            dataset_index = get_dataset_index(rec['fn'])
            image_index = rec['i']
            for cat in categories:
                if ((type(rec[cat]) is np.ndarray and rec[cat].size > 0)
                        or type(rec[cat]) is list and len(rec[cat]) > 0):
                    index[image_index][dataset_index][np.unique(
                        rec[cat])] = True
        batch_count += 1

    mmap = ed.open_mmap(part='image_dataset_label',
                        mode='w+',
                        dtype=bool,
                        shape=shape)
    mmap[:] = index[:]
    ed.finish_mmap(mmap)
    print('Finished and saved at %s' %
          ed.mmap_filename(part='image_dataset_label'))
示例#2
0
def compute_alpha(directory):
    ed = expdir.ExperimentDirectory(directory)
    info = ed.load_info()
    ds = loadseg.SegmentationData(info.dataset)
    L = ds.label_size()
    if not has_image_to_label(directory):
        create_image_to_label(directory)
    image_to_label = load_image_to_label(directory)

    label_names = np.array([ds.label[i]['name'] for i in range(L)])

    alphas = np.zeros((L, ))

    for label_i in range(1, L):
        label_categories = ds.label[label_i]['category'].keys()
        label_idx = np.where(image_to_label[:, label_i])[0]
        train_loader = loadseg.SegmentationPrefetcher(
            ds,
            categories=label_categories,
            split='train',
            indexes=label_idx,
            once=True,
            batch_size=64,
            ahead=4,
            thread=True)
        train_idx = np.array(train_loader.indexes)
        #sw = 0
        #sh = 0
        perc_label = []
        #train_label_categories = []
        for batch in train_loader.batches():
            for rec in batch:
                sw, sh = [rec[k] for k in ['sw', 'sh']]
                #sw_r, sh_r = [rec[k] for k in ['sw', 'sh']]
                #if sw == 0 and sh == 0:
                #    sw = sw_r
                #    sh = sh_r
                #else:
                #    assert(sw == sw_r and sh == sh_r)
                for cat in label_categories:
                    if rec[cat] != []:
                        #train_label_categories.append(cat)
                        if type(rec[cat]) is np.ndarray:
                            perc_label.append(
                                np.sum(rec[cat] == label_i) / float(sw * sh))
                        else:
                            perc_label.append(1.)
                        break
        assert (len(perc_label) == len(train_idx))

        alphas[label_i] = float(1. - np.mean(perc_label))
        print label_i, label_names[label_i], alphas[label_i]
        train_loader.close()

    alphas_mmap = ed.open_mmap(part='train_alphas',
                               mode='w+',
                               dtype='float32',
                               shape=alphas.shape)
    alphas_mmap[:] = alphas[:]
    ed.finish_mmap(alphas_mmap)
示例#3
0
def create_image_to_label(directory, batch_size=16, ahead=4):
    ed = expdir.ExperimentDirectory(directory)
    info = ed.load_info()

    print info.dataset
    if 'broden' in info.dataset:
        ds = loadseg.SegmentationData(info.dataset)
        categories = ds.category_names()
        shape = (ds.size(), len(ds.label))

        pf = loadseg.SegmentationPrefetcher(ds,
                                            categories=categories,
                                            once=True,
                                            batch_size=batch_size,
                                            ahead=ahead,
                                            thread=False)

        image_to_label = np.zeros(shape, dtype='int32')

        batch_count = 0
        for batch in pf.batches():
            if batch_count % 100 == 0:
                print('Processing batch %d ...' % batch_count)
            for rec in batch:
                image_index = rec['i']
                for cat in categories:
                    if ((type(rec[cat]) is np.ndarray and rec[cat].size > 0)
                            or type(rec[cat]) is list and len(rec[cat]) > 0):
                        image_to_label[image_index][np.unique(rec[cat])] = True
            batch_count += 1
    elif 'imagenet' in info.dataset or 'ILSVRC' in info.dataset:
        classes, class_to_idx = find_classes(info.dataset)
        imgs = make_dataset(info.dataset, class_to_idx)
        _, labels = zip(*imgs)
        labels = np.array(labels)

        L = 1000
        shape = (len(labels), L)

        image_to_label = np.zeros(shape)

        for i in range(L):
            image_to_label[labels == i, i] = 1
    else:
        assert (False)

    mmap = ed.open_mmap(part='image_to_label',
                        mode='w+',
                        dtype=bool,
                        shape=shape)
    mmap[:] = image_to_label[:]
    ed.finish_mmap(mmap)
    f = ed.mmap_filename(part='image_to_label')

    print('Finished and saved index_to_label at %s' % f)
def extract_concept_data(directory, batch_size=64, ahead=16, verbose=True):
    ed = expdir.ExperimentDirectory(directory)
    if ed.has_mmap(part='concept_data'):
        print('%s already has %s, so skipping' %
              (directory, ed.mmap_filename(part='concept_data')))
        return
    info = ed.load_info()
    (sh, sw) = get_seg_size(info.input_dim)
    ds = loadseg.SegmentationData(info.dataset)
    categories = np.array(ds.category_names())
    L = ds.label_size()
    N = ds.size()
    pf = loadseg.SegmentationPrefetcher(ds,
                                        categories=categories,
                                        once=True,
                                        batch_size=batch_size,
                                        ahead=ahead,
                                        thread=True)

    if verbose:
        print 'Creating new mmap at %s' % ed.mmap_filename(part='concept_data')
    data = ed.open_mmap(part='concept_data', mode='w+', shape=(N, L, sh, sw))

    start_time = time.time()
    last_batch_time = start_time
    index = 0
    for batch in pf.batches():
        batch_time = time.time()
        rate = index / (batch_time - start_time + 1e-15)
        batch_rate = batch_size / (batch_time - last_batch_time + 1e-15)
        last_batch_time = batch_time
        if verbose:
            print 'extract_concept_data index %d/%d (%.2f)\titems per sec %.2f\t%.2f' % (
                index, N, index / float(N), batch_rate, rate)
        for rec in batch:
            for cat in categories:
                if len(rec[cat]) == 0:
                    continue
                if cat == 'texture' or cat == 'scene':
                    for i in range(len(rec[cat])):
                        data[index][rec[cat][i] - 1, :, :] = 1
                else:
                    for i in range(len(rec[cat])):
                        ys, xs = np.where(rec[cat][i])
                        for j in range(len(xs)):
                            data[index][rec[cat][i][ys[j]][xs[j]] -
                                        1][ys[j]][xs[j]] = 1
            index += 1

    assert index == N, ("Data source should return every item once %d %d." %
                        (index, N))
    if verbose:
        print 'Renaming mmap.'
    ed.finish_mmap(data)
示例#5
0
def BrodenDataGenerator():
    split = "train"
    batch_size = 8
    randomize = True

    while True:
        datasource = loadseg.SegmentationData(TEST_DIR,
                categories=categories)
        prefetcher = loadseg.SegmentationPrefetcher(datasource,
                split=split, categories=['image'] + categories,
                segmentation_shape=None,
                batch_size=batch_size, randomize=randomize, ahead=12)
        batch = prefetcher.fetch_batch()
        yield batch, None
示例#6
0
def process_data(fn_t, fn_read, shape, tally_depth, ds, iw, ih, categories,
                 fieldmap, thresh, labelcat, batch_size, ahead, verbose,
                 thread, start, end):
    unit_size = len(thresh)
    blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape)
    count_t = cached_memmap(fn_t,
                            mode='r+',
                            dtype='int32',
                            shape=(ds.size(), tally_depth, 3))
    count_t[...] = 0
    # The main loop
    if verbose:
        print 'Beginning work for evaluating', blob
    pf = loadseg.SegmentationPrefetcher(ds,
                                        categories=categories,
                                        start=start,
                                        end=end,
                                        once=True,
                                        batch_size=batch_size,
                                        ahead=ahead,
                                        thread=False)
    index = start
    start_time = time.time()
    last_batch_time = start_time
    batch_size = 0
    for batch in pf.batches():
        batch_time = time.time()
        rate = (index - start) / (batch_time - start_time + 1e-15)
        batch_rate = batch_size / (batch_time - last_batch_time + 1e-15)
        last_batch_time = batch_time
        if verbose:
            print 'labelprobe index', index, 'items per sec', batch_rate, rate
            sys.stdout.flush()
        for rec in batch:
            sw, sh = [rec[k] for k in ['sw', 'sh']]
            reduction = int(round(iw / float(sw)))
            up = upsample.upsampleL(fieldmap,
                                    blobdata[index],
                                    shape=(sh, sw),
                                    reduction=reduction)
            mask = up > thresh
            accumulate_counts(mask, [rec[cat] for cat in categories],
                              count_t[index], unit_size, labelcat)
            index += 1
        batch_size = len(batch)
    count_t.flush()
    def __init__(self):
        """
        Setup data layer according to parameters:
        - mean: tuple of mean values to subtract
        - randomize: load in random order (default: True)
        - seed: seed for randomization (default: None / current time)
        """
        # config
        # params = eval(self.param_str)
        self.directory = TEST_DIR  # Really should be from param_str
        self.split = params['split']  # I have not implemented splits yet
        self.mean = numpy.array(params['mean'])
        self.random = params.get('randomize', True)
        self.random_flip = True  #params.get('randomize', True)
        self.seed = params.get('seed', None)
        self.batch_size = params.get('batch_size', 1)
        self.disp = 0
        self.categories = ['object', 'part', 'texture', 'material', 'color']
        # self.categories = ['object']
        self.categories_num_class = [584, 234, 47, 32, 11]
        self.segmentation_shape = params.get('segmentation_shape', None)
        self.splitmap = {'train': 1, 'val': 2}

        # Convert to 2-dimensional shape.
        if self.segmentation_shape and len(numpy.shape(
                self.segmentation_shape)) == 0:
            self.segmentation_shape = (self.segmentation_shape, ) * 2

        # Specific object classes to ignore.
        self.blacklist = {
            #'object': [1,2] # wall, floor, ceiling, sky: in uniseg: 4 become tree!!
        }

        # Thresholds to ignore: these classes and any ones rarer (higher).
        self.outliers = {
            'object': 537,  # brick occurs only 9 times in test_384.
            #'part': 155,      # porch occurs only 9 times in test_384

            # if switching to uniseg, switch 561->544.
            # because there are fewer object classes.
            # part classes remain the same.
        }

        # make eval deterministic
        if 'train' not in self.split:
            self.random = False
            self.random_flip = False

        # Load up metadata for images and labels
        self.datasource = loadseg.SegmentationData(self.directory,
                                                   categories=self.categories)
        self.prefetcher = loadseg.SegmentationPrefetcher(
            self.datasource,
            split=self.split,
            categories=['image'] + self.categories,
            segmentation_shape=self.segmentation_shape,
            batch_size=self.batch_size,
            randomize=self.random)  # ahead=12)

        # Now make a blacklist map for blacklisted types
        self.zeromap = {}
        for k, z in self.blacklist.items():
            self.zeromap[k] = numpy.arange(self.datasource.label_size(k))
            self.zeromap[k][z] = 0
        for k, z in self.outliers.items():
            if k not in self.zeromap:
                self.zeromap[k] = numpy.arange(self.datasource.label_size(k))
            self.zeromap[k][numpy.arange(z, self.datasource.label_size(k))] = 0

        # Build our category map which merges the category map and the zeromap
        self.categorymap = {}
        for cat in self.categories:
            catmap = self.datasource.category_index_map(cat)
            if cat in self.zeromap:
                catmap = self.zeromap[cat][catmap]
            self.categorymap[cat] = catmap
示例#8
0
def linear_probe(directory, blob, label_i, suffix='', init_suffix='', num_filters=None, batch_size=16, ahead=4, 
        quantile=0.005, bias=False, positive=False, num_epochs=30, lr=1e-4, momentum=0.9, 
        l1_weight_decay=0, l2_weight_decay=0, validation=False, nesterov=False, lower_bound=None,
        min_train=None, max_train=None, max_val=None,
        cuda=False):
    # Make sure we have a directory to work in
    #qcode = ('%f' % quantile).replace('0.','').rstrip('0')
    ed = expdir.ExperimentDirectory(directory)
    # Check if linear weights have already been learned 
    print ed.mmap_filename(blob=blob, part='label_i_%s_weights%s' % (label_i, suffix))
    if ed.has_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix)):
        print('%s already has %s, so skipping.' % (directory,
            ed.mmap_filename(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix))))
        return
    # Load probe metadata
    info = ed.load_info()
    ih, iw = info.input_dim
    # Load blob metadata
    blob_info = ed.load_info(blob=blob)
    shape = blob_info.shape
    unit_size = shape[1]
    fieldmap = blob_info.fieldmap
    # Load the blob quantile data and grab thresholds
    if quantile == 1:
        thresh = np.zeros((unit_size,1,1))
    else:
        quantdata = ed.open_mmap(blob=blob, part='quant-*', shape=(unit_size, -1))
        threshold = quantdata[:, int(round(quantdata.shape[1] * quantile))]
        thresh = threshold[:, np.newaxis, np.newaxis]
    #print np.max(thresh), thresh.shape, type(thresh)
    # Map the blob activation data for reading
    fn_read = ed.mmap_filename(blob=blob)
    # Load the dataset
    ds = loadseg.SegmentationData(info.dataset)
    # Get all the categories the label is a part of
    label_categories = ds.label[label_i]['category'].keys()
    num_categories = len(label_categories)
    # Get label name
    label_name = ds.name(category=None, j=label_i)

    blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape)
    # Get indices of images containing the given label
    if not has_image_to_label(directory):
        print('image_to_label does not exist in %s; creating it now...' % directory)
        create_image_to_label(directory, batch_size=batch_size, ahead=ahead)
    image_to_label = load_image_to_label(directory)
    label_idx = np.where(image_to_label[:, label_i])[0]
    train_idx = np.array([i for i in label_idx if ds.split(i) == 'train'])
    val_idx = np.array([i for i in label_idx if ds.split(i) == 'val'])
    if min_train is not None and len(train_idx) < min_train:
        print('Number of training examples for label %d (%s) is %d, which is less than the minimum of %d so skipping.' 
                % (label_i, label_name, len(train_idx), min_train))
    if max_train is not None and len(train_idx) > max_train:
        train_idx = train_idx[:max_train]
    if max_val is not None and len(val_idx) > max_val:
        val_idx = val_idx[:max_val]

    print('Total number of images containing label %d (%s): %d' % (
        label_i, label_name, len(label_idx)))
    
    try:
        train_loader = loadseg.SegmentationPrefetcher(ds, categories=label_categories,
                                                      indexes=train_idx, once=False,
                                                      batch_size=batch_size,
                                                      ahead=ahead, thread=True)
    except IndexError as err:
        print(err.args)
        return
    
    sw = 0
    sh = 0
    perc_label = []
    train_label_categories = []
    for batch in train_loader.batches():
        for rec in batch:
            # Check that the same segmentation dimensions are used for all
            # examples
            sw_r, sh_r = [rec[k] for k in ['sw', 'sh']]
            if sw == 0 and sh == 0:
                sw = sw_r
                sh = sh_r
            else:
                assert(sw == sw_r and sh == sh_r)
            for cat in label_categories:
                if rec[cat] != []:
                    train_label_categories.append(cat)
                    if type(rec[cat]) is np.ndarray:
                        perc_label.append(np.sum(rec[cat] == label_i) / float(sw * sh))
                    else:
                        perc_label.append(1.)
                    break
    assert(len(perc_label) == len(train_idx))

    # Compute reduction from segmentation dimensions to image dimensions
    reduction = int(round(iw / float(sw)))
    # Calculate class-weighting alpha parameter for segmentation loss
    # (Note: float typecast is necessary)
    alpha = float(1. - np.mean(perc_label))
    if alpha == 0:
        alpha = None
        print('Not using class-weighting because no pixel-level annotations')
    else:
        print('Alpha for label %d (%s): %f' % (label_i, label_name, alpha))

    # Prepare segmentation loss function using class-weight alpha
    criterion = lambda x,y: BCELoss2d(x,y,alpha)
    # Prepare to learn linear weights with a sigmoid activation after
    # the linear layer
    #layer = CustomLayer(unit_size, upsample=False, act=True, positive=False)
    if num_filters is not None:
        if ed.has_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, init_suffix)):
            init_weights_mmap = ed.open_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, init_suffix), 
                    mode='r', dtype='float32', shape=unit_size)
        elif ed.has_mmap(blob=blob, part='linear_weights%s' % (init_suffix)):
            all_weights_mmap = ed.open_mmap(blob=blob, part='linear_weights%s' % init_suffix,
                    mode='r', dtype='float32', shape=(ds.label_size(),unit_size))
            init_weights_mmap = all_weights_mmap[label_i]
        else:
            assert(False)
        sorted_idx = np.argsort(np.abs(init_weights_mmap))[::-1]
        mask_idx = np.zeros(unit_size, dtype=int)
        mask_idx[sorted_idx[:num_filters]] = 1
        layer = CustomLayer(unit_size, upsample=True, up_size=(sh,sw), act=True,
                bias=bias, positive=positive, mask_idx=torch.ByteTensor(mask_idx), cuda=cuda)
    else:
        layer = CustomLayer(unit_size, upsample=True, up_size=(sh,sw), act=True, 
                bias=bias, positive=positive, cuda=cuda)
    if cuda:
        layer.cuda()

    optimizer = Custom_SGD(layer.parameters(), lr, momentum,
            l1_weight_decay=l1_weight_decay, l2_weight_decay=l2_weight_decay,
            nesterov=nesterov, lower_bound=lower_bound)

    if not validation:
        try:
            val_loader = loadseg.SegmentationPrefetcher(ds, categories=label_categories,
                    indexes=val_idx, once=False, batch_size=batch_size,
                    ahead=ahead, thread=True)
        except IndexError as err:
            print(err.args)
            train_loader.close()
            return

        val_label_categories = []
        for batch in val_loader.batches():
            for rec in batch:
                for cat in label_categories:
                    if rec[cat] != []:
                        val_label_categories.append(cat)
                        break
        assert(len(val_label_categories) == len(val_idx))

    for t in range(num_epochs):
        (_, iou) = run_epoch(blobdata, train_idx, train_label_categories, label_i,
                fieldmap, thresh, sh, sw, reduction, train_loader, layer, criterion, 
                optimizer, t+1, train=True, cuda=cuda, iou_threshold=0.5)
        if not validation:
            (_, iou) = run_epoch(blobdata, val_idx, val_label_categories, label_i,
                    fieldmap, thresh, sh, sw, reduction, val_loader, layer, criterion,
                    optimizer, t+1, train=False, cuda=cuda, iou_threshold=0.5)

    # Close segmentation prefetcher (i.e. close pools)
    train_loader.close()
    if not validation:
        val_loader.close()

    # Save weights
    weights = (layer.mask * layer.weight).data.cpu().numpy()
    weights_mmap = ed.open_mmap(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix),
            mode='w+', dtype='float32', shape=weights.shape)
    weights_mmap[:] = weights[:]
    ed.finish_mmap(weights_mmap)
    if bias:
        bias_v = layer.bias.data.cpu().numpy()
        bias_mmap = ed.open_mmap(blob=blob, part='label_i_%d_bias%s' % (label_i, suffix),
                mode='w+', dtype='float32', shape=(1,))
        bias_mmap[:] = bias_v[:]
        ed.finish_mmap(bias_mmap)
    print '%s finished' % ed.mmap_filename(blob=blob, part='label_i_%d_weights%s' % (label_i, suffix))
def probe_linear(directory,
                 blob,
                 suffix='',
                 start=None,
                 end=None,
                 batch_size=16,
                 ahead=4,
                 quantile=0.005,
                 bias=False,
                 positive=False,
                 cuda=False,
                 force=False):
    qcode = ('%f' % quantile).replace('0.', '.').rstrip('0')
    ed = expdir.ExperimentDirectory(directory)
    if (ed.has_mmap(blob=blob, part='linear_ind_ious%s' % suffix)
            and ed.has_mmap(blob=blob, part='linear_set_ious%s' % suffix)):
        print('Linear weights have already been probed.')
        print ed.mmap_filename(blob=blob,
                               part='linear_set_val_ious%s' % suffix)
        if not force:
            return
        else:
            print('Forcefully continuing...')
    info = ed.load_info()
    seg_size = get_seg_size(info.input_dim)
    blob_info = ed.load_info(blob=blob)
    ds = loadseg.SegmentationData(info.dataset)
    shape = blob_info.shape
    N = shape[0]  # number of total images
    K = shape[1]  # number of units in given blob
    L = ds.label_size()  # number of labels

    if quantile == 1:
        thresh = np.zeros((K, 1, 1))
    else:
        quantdata = ed.open_mmap(blob=blob, part='quant-*', shape=(K, -1))
        threshold = quantdata[:, int(round(quantdata.shape[1] * quantile))]
        thresh = threshold[:, np.newaxis, np.newaxis]

    fn_read = ed.mmap_filename(blob=blob)
    blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape)
    image_to_label = load_image_to_label(directory)

    if ed.has_mmap(blob=blob, part='linear_ind_ious%s' % suffix,
                   inc=True) and not force:
        assert (ed.has_mmap(blob=blob,
                            part='linear_set_ious%s' % suffix,
                            inc=True))
        ind_ious = ed.open_mmap(blob=blob,
                                part='linear_ind_ious%s' % suffix,
                                mode='r+',
                                inc=True,
                                dtype='float32',
                                shape=(L, N))
        set_ious = ed.open_mmap(blob=blob,
                                part='linear_set_ious%s' % suffix,
                                mode='r+',
                                inc=True,
                                dtype='float32',
                                shape=(L, ))
        set_ious_train = ed.open_mmap(blob=blob,
                                      part='linear_set_train_ious%s' % suffix,
                                      mode='r+',
                                      inc=True,
                                      dtype='float32',
                                      shape=(L, ))
        try:
            set_ious_val = ed.open_mmap(blob=blob,
                                        part='linear_set_val_ious%s' % suffix,
                                        mode='r+',
                                        inc=True,
                                        dtype='float32',
                                        shape=(L, ))
        except:
            set_ious_val = ed.open_mmap(blob=blob,
                                        part='linear_set_val_ious%s' % suffix,
                                        mode='r+',
                                        dtype='float32',
                                        shape=(L, ))

    else:
        ind_ious = ed.open_mmap(blob=blob,
                                part='linear_ind_ious%s' % suffix,
                                mode='w+',
                                dtype='float32',
                                shape=(L, N))
        set_ious = ed.open_mmap(blob=blob,
                                part='linear_set_ious%s' % suffix,
                                mode='w+',
                                dtype='float32',
                                shape=(L, ))
        set_ious_train = ed.open_mmap(blob=blob,
                                      part='linear_set_train_ious%s' % suffix,
                                      mode='w+',
                                      dtype='float32',
                                      shape=(L, ))
        set_ious_val = ed.open_mmap(blob=blob,
                                    part='linear_set_val_ious%s' % suffix,
                                    mode='w+',
                                    dtype='float32',
                                    shape=(L, ))

    if start is None:
        start = 1
    if end is None:
        end = L
    for label_i in range(start, end):
        if ed.has_mmap(blob=blob,
                       part='label_i_%d_weights%s' % (label_i, suffix)):
            try:
                weights = ed.open_mmap(blob=blob,
                                       part='label_i_%d_weights%s' %
                                       (label_i, suffix),
                                       mode='r',
                                       dtype='float32',
                                       shape=(K, ))
            except ValueError:
                # SUPPORTING LEGACY CODE (TODO: Remove)
                weights = ed.open_mmap(blob=blob,
                                       part='label_i_%d_weights%s' %
                                       (label_i, suffix),
                                       mode='r',
                                       dtype=float,
                                       shape=(K, ))
        elif ed.has_mmap(blob=blob, part='linear_weights%s' % suffix):
            all_weights = ed.open_mmap(blob=blob,
                                       part='linear_weights%s' % suffix,
                                       mode='r',
                                       dtype='float32',
                                       shape=(L, K))
            weights = all_weights[label_i]
            if not np.any(weights):
                print(
                    'Label %d does not have associated weights to it, so skipping.'
                    % label_i)
                continue
        else:
            print(
                'Label %d does not have associated weights to it, so skipping.'
                % label_i)
            continue

        if bias:
            if ed.has_mmap(blob=blob,
                           part='label_i_%d_bias%s' % (label_i, suffix)):
                bias_v = ed.open_mmap(blob=blob,
                                      part='label_i_%d_bias%s' %
                                      (label_i, suffix),
                                      mode='r',
                                      dtype='float32',
                                      shape=(1, ))
            else:
                assert (ed.has_mmap(blob=blob, part='linear_bias%s' % suffix))
                all_bias_v = ed.open_mmap(blob=blob,
                                          part='linear_bias%s' % suffix,
                                          mode='r',
                                          dtype='float32',
                                          shape=(L, ))
                bias_v = np.array([all_bias_v[label_i]])

        label_categories = ds.label[label_i]['category'].keys()
        label_name = ds.name(category=None, j=label_i)
        label_idx = np.where(image_to_label[:, label_i])[0]

        loader = loadseg.SegmentationPrefetcher(ds,
                                                categories=label_categories,
                                                indexes=label_idx,
                                                once=True,
                                                batch_size=batch_size,
                                                ahead=ahead,
                                                thread=True)
        loader_idx = loader.indexes
        num_imgs = len(loader.indexes)

        print(
            'Probing with learned weights for label %d (%s) with %d images...'
            % (label_i, label_name, num_imgs))

        model = CustomLayer(K,
                            upsample=True,
                            up_size=seg_size,
                            act=True,
                            bias=bias,
                            positive=positive,
                            cuda=cuda)
        model.weight.data[...] = torch.Tensor(weights)
        if bias:
            model.bias.data[...] = torch.Tensor(bias_v)

        if cuda:
            model.cuda()
        model.eval()

        iou_intersects = np.zeros(num_imgs)
        iou_unions = np.zeros(num_imgs)

        i = 0
        for batch in loader.batches():
            start_t = time.time()
            if (i + 1) * batch_size < num_imgs:
                idx = range(i * batch_size, (i + 1) * batch_size)
            else:
                idx = range(i * batch_size, num_imgs)
            i += 1
            input = torch.Tensor(
                (blobdata[loader_idx[idx]] > thresh).astype(float))
            input_var = (Variable(input.cuda(), volatile=True)
                         if cuda else Variable(input, volatile=True))

            target_ = []
            for rec in batch:
                for cat in label_categories:
                    if rec[cat] != []:
                        if type(rec[cat]) is np.ndarray:
                            target_.append(
                                np.max((rec[cat] == label_i).astype(float),
                                       axis=0))
                        else:
                            target_.append(np.ones(seg_size))
                        break
            target = torch.Tensor(target_)
            target_var = (Variable(target.cuda(), volatile=True)
                          if cuda else Variable(target, volatile=True))
            #target_var = Variable(target.unsqueeze(1).expand_as(
            #    input_var).cuda() if cuda
            #    else target.unsqueeze(1).expand_as(input_var))
            output_var = model(input_var)

            iou_intersects[idx] = np.squeeze(
                iou_intersect_d(output_var, target_var).data.cpu().numpy())
            iou_unions[idx] = np.squeeze(
                iou_union_d(output_var, target_var).data.cpu().numpy())
            print('Batch: %d/%d\tTime: %f secs\tAvg Ind IOU: %f' %
                  (i, num_imgs / batch_size, time.time() - start_t,
                   np.mean(
                       np.true_divide(iou_intersects[idx],
                                      iou_unions[idx] + 1e-20))))

        loader.close()
        label_ind_ious = np.true_divide(iou_intersects, iou_unions + 1e-20)
        label_set_iou = np.true_divide(np.sum(iou_intersects),
                                       np.sum(iou_unions) + 1e-20)

        ind_ious[label_i, loader_idx] = label_ind_ious
        set_ious[label_i] = label_set_iou
        train_idx = [
            i for i in range(len(loader_idx))
            if ds.split(loader_idx[i]) == 'train'
        ]
        val_idx = [
            i for i in range(len(loader_idx))
            if ds.split(loader_idx[i]) == 'val'
        ]
        set_ious_train[label_i] = np.true_divide(
            np.sum(iou_intersects[train_idx]),
            np.sum(iou_unions[train_idx]) + 1e-20)
        set_ious_val[label_i] = np.true_divide(
            np.sum(iou_intersects[val_idx]),
            np.sum(iou_unions[val_idx]) + 1e-20)

        print(
            'Label %d (%s) Set IOU: %f, Train Set IOU: %f, Val Set IOU: %f, Max Ind IOU: %f'
            % (label_i, label_name, label_set_iou, set_ious_train[label_i],
               set_ious_val[label_i], np.max(label_ind_ious)))

    ed.finish_mmap(ind_ious)
    ed.finish_mmap(set_ious)
    ed.finish_mmap(set_ious_train)
    ed.finish_mmap(set_ious_val)
示例#10
0
def label_probe(directory, blob, quantile=0.005, batch_size=16, ahead=4, start=None,
        end=None, suffix='', cuda=False):
    # Make sure we have a directory to work in
    qcode = ('%f' % quantile).replace('0.','').rstrip('0')
    ed = expdir.ExperimentDirectory(directory)
    # Check if label probe has already been created
    if (ed.has_mmap(blob=blob, part='single_set_ious%s' % suffix) and 
            ed.has_mmap(blob=blob, part='single_ind_ious%s' % suffix)):
        print('label_probe_pytorch.py has already been run.')
        return
    # Load probe metadata
    info = ed.load_info()
    seg_size = get_seg_size(info.input_dim)
    # Load blob metadata
    blob_info = ed.load_info(blob=blob)
    shape = blob_info.shape
    tot_imgs = shape[0]
    unit_size = shape[1]
    # Load the blob quantile data and grab thresholds
    quantdata = ed.open_mmap(blob=blob, part='quant-*', shape=(unit_size, -1))
    threshold = quantdata[:, int(round(quantdata.shape[1] * quantile))]
    thresh = threshold[:, np.newaxis, np.newaxis]
    # Load the dataset
    ds = loadseg.SegmentationData(info.dataset)
    # Map the blob activation data for reading
    #fn_read = ed.mmap_filename(blob=blob)
    #blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape)
    blobdata = ed.open_mmap(blob=blob, mode='r', shape=shape)
    # Get image-to-labels mapping
    if not has_image_to_label(directory):
        print('image_to_label does not exist in %s; creating it now...' % directory)
        create_image_to_label(directory, batch_size=batch_size, ahead=ahead)
    image_to_label = load_image_to_label(directory)

    num_labels = ds.label_size()
    upsample = nn.Upsample(size=seg_size, mode='bilinear')

    set_ious_train_mmap = ed.open_mmap(blob=blob, part='single_set_train_ious%s' % suffix, 
            mode='w+', dtype='float32', shape=(num_labels, unit_size))
    set_ious_val_mmap = ed.open_mmap(blob=blob, part='single_set_val_ious%s' % suffix,
            mode='w+', dtype='float32', shape=(num_labels, unit_size))
    set_ious_mmap = ed.open_mmap(blob=blob, part='single_set_ious%s' % suffix, mode='w+',
        dtype='float32', shape=(num_labels, unit_size))
    ind_ious_mmap = ed.open_mmap(blob=blob, part='single_ind_ious%s' % suffix, mode='w+',
        dtype='float32', shape=(num_labels, tot_imgs, unit_size))
    
    if start is None:
        start = 1
    if end is None:
        end = num_labels
    #for label_i in range(1, num_labels):
    for label_i in range(start, end):
        print('Starting for label %d (%s)' % (label_i, ds.name(category=None,
            j=label_i)))
        label_categories = ds.label[label_i]['category'].keys()
        num_cats = len(label_categories)
        label_idx = np.where(image_to_label[:, label_i])[0]
        loader = loadseg.SegmentationPrefetcher(ds, categories=label_categories, 
                indexes=label_idx, once=False, batch_size=batch_size, 
                ahead=ahead, thread=True)
        loader_idx = loader.indexes
        N = len(loader_idx)
        iou_intersects = np.zeros((N, unit_size))
        iou_unions = np.zeros((N, unit_size)) 

        if num_cats > 1:
            rec_labcat = []
            for batch in loader.batches():
                for rec in batch:
                    for cat in label_categories:
                        if rec[cat] != []:
                            rec_labcat.append(cat)
                            break
        else:
            rec_labcat = [label_categories[0] for i in range(N)]


        i = 0
        for batch in loader.batches():
            start_t = time.time()
            if (i+1)*batch_size < N:
                idx = range(i*batch_size, (i+1)*batch_size)
            else:
                idx = range(i*batch_size, N)
            i += 1
            input = torch.Tensor((blobdata[loader_idx[idx]] > thresh).astype(float))
            input_var = upsample(Variable(input.cuda()) if cuda else
                    Variable(input))
            target = torch.Tensor([np.max((rec[rec_labcat[j]] 
                == label_i).astype(float), axis=0) 
                if type(rec[rec_labcat[j]]) is np.ndarray
                else np.ones(seg_size) for j, rec in enumerate(batch)])
            target_var = Variable(target.unsqueeze(1).expand_as(
                input_var).cuda() if cuda 
                else target.unsqueeze(1).expand_as(input_var))
            iou_intersects[idx] = np.squeeze(iou_intersect_d(input_var, 
                target_var).data.cpu().numpy())
            iou_unions[idx] = np.squeeze(iou_union_d(input_var, 
                target_var).data.cpu().numpy())
            print('Batch %d/%d\tTime %f secs\tAvg Ind IOU %f\t' % (i, N/batch_size, 
                time.time()-start_t, np.mean(np.true_divide(iou_intersects[idx], 
                    iou_unions[idx] + 1e-20))))

        set_ious = np.true_divide(np.sum(iou_intersects, axis=0), 
                np.sum(iou_unions, axis=0) + 1e-20)
        loader.close()
        best_filter = np.argmax(set_ious)
        print('Label %d (%s): best set IOU = %f (filter %d)' % (label_i, 
            ds.name(category=None,j=label_i), set_ious[best_filter], best_filter))
        ind_ious = np.true_divide(iou_intersects, iou_unions + 1e-20)

        set_ious_mmap[label_i] = set_ious
        ind_ious_mmap[label_i, loader_idx] = ind_ious
        train_idx = [i for i in range(len(loader_idx)) if ds.split(loader_idx[i]) == 'train']
        val_idx = [i for i in range(len(loader_idx)) if ds.split(loader_idx[i]) == 'val']
        set_ious_train_mmap[label_i] = np.true_divide(np.sum(iou_intersects[train_idx], axis=0),
                np.sum(iou_unions[train_idx], axis=0) + 1e-20)
        set_ious_val_mmap[label_i] = np.true_divide(np.sum(iou_intersects[val_idx], axis=0),
                np.sum(iou_unions[val_idx], axis=0) + 1e-20)

        #set_ious_mmap.flush()
        #ind_ious_mmap.flush()
    
    ed.finish_mmap(set_ious_train_mmap)
    ed.finish_mmap(set_ious_val_mmap)
    ed.finish_mmap(set_ious_mmap)
    ed.finish_mmap(ind_ious_mmap)