def createTrials(self):

        dummy_upload = True
        image_bucket_name = 'rosch_pose'
        seed = 0

        meta_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cats[0] + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/meta.pkl'
        meta = cPickle.load(open(meta_pth))
        perm = np.random.RandomState(0).permutation(len(meta))
        meta_p = meta[perm]
        pinv = fast.perminverse(perm)

        # decide learning_inds
        learning_inds = np.arange(LEARNING_PERIOD)
        # decide query_inds
        objs = list(set(meta['obj']))
        nObj = round(float(NUM_TRAIN) / len(objs)) + 1
        query_inds = np.array([], dtype=int)
        v = np.arange(len(meta))
        for obj in objs:
            query_inds = np.append(query_inds, perm[meta_p['obj']==obj][-nObj:])
        query_inds = np.sort(query_inds)[:NUM_TRAIN]
        query_inds = np.append(query_inds, query_inds[-REPEATS:])
        query_inds = np.append(learning_inds, query_inds)
        assert len(query_inds) == NUM_TRAIN+REPEATS+LEARNING_PERIOD
        print query_inds

        urls = publish_images(cats[0], query_inds, image_bucket_name, dummy_upload=dummy_upload)

        # construct experiment info
        bmeta = meta[query_inds]
        imgs = urls
        imgData = [{df: bm[df] for df in meta.dtype.names} for bm in bmeta]

        meta_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cats[1] + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/meta.pkl'
        meta = cPickle.load(open(meta_pth))
        perm = np.random.RandomState(0).permutation(len(meta))
        meta_p = meta[perm]
        pinv = fast.perminverse(perm)

        # decide query_inds
        objs = list(set(meta['obj']))
        nObj = round(float(NUM_TEST) / len(objs)) + 1
        query_inds_here = np.array([], dtype=int)
        v = np.arange(len(meta))
        for obj in objs:
            query_inds_here = np.append(query_inds_here, perm[meta_p['obj']==obj][-nObj:])
        query_inds_here = np.sort(query_inds_here)[:NUM_TEST]
        query_inds_here = np.append(query_inds_here, query_inds_here[inds_repeat])
        assert len(query_inds_here) == NUM_TEST+REPEATS
        print query_inds_here

        # publish images if needed
        urls = publish_images(cats[1], query_inds_here, image_bucket_name, dummy_upload=dummy_upload)

        # construct experiment info
        bmeta = meta[query_inds_here]
        imgs = imgs + urls
        imgData = imgData + [{df: bm[df] for df in meta.dtype.names} for bm in bmeta]
        self._trials = {'imgFiles': imgs, 'imgData': imgData}
Exemplo n.º 2
0
 def get_metacol_base(self, ma, perm=None):
     assert isinstance(ma, str), ma
     if ma in self.dset.meta.dtype.names:
         metacol = self.dset.meta[ma][:]
     else:
         metacol = getattr(self.dset, ma)[:]
     if perm is not None:
     	assert perm.shape == metacol.shape
     	metacol = metacol[perm]
     if hasattr(self, 'subslice'):
         metacol = metacol[self.subslice]
     try:
         metacol + 1
         labels_unique = None
     except TypeError:
         labels_unique = n.unique(metacol)
         s = metacol.argsort()
         cat_s = metacol[s]
         ss = n.array([0] + ((cat_s[1:] != cat_s[:-1]).nonzero()[0] + 1).tolist() + [len(cat_s)])
         ssd = ss[1:] - ss[:-1]
         labels = n.repeat(n.arange(len(labels_unique)), ssd)
         metacol = labels[fast.perminverse(s)]
         #labels = n.zeros((mlen, ), dtype='int')
         #print(len(labels_unique), "L")
         #for label in range(len(labels_unique)):
         #    labels[metacol == labels_unique[label]] = label
         #metacol = labels
     return metacol, labels_unique
def publish_images(cat, query_inds, bucket_name, dummy_upload=True):
    im_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cat + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/cache/568ce4d00d2c7901515e71c0f90628db084f9dc6/jpeg/'
    meta_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cat + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/meta.pkl'
    meta = cPickle.load(open(meta_pth))

    if 'filename' not in meta.dtype.names:    
        # save filenames
        perm = np.random.RandomState(0).permutation(len(meta)) 
        pinv = fast.perminverse(perm)
        filename = [im_pth + str(pinv[i]) + '.jpeg' for i in range(len(meta))]
        meta = meta.addcols(filename, names='filename')
        with open(meta_pth, 'w') as f:
            cPickle.dump(meta, f)
    
    # publish to s3
    conn = boto.connect_s3()
    b = conn.create_bucket(bucket_name)
    urls = [ ]
    for count, ind in enumerate(query_inds):
        name = cat + '_' + str(meta['id'][ind]) + '.jpeg'
        url = 'https://s3.amazonaws.com/' + bucket_name + '/' + name
        urls.append(url)
        if not dummy_upload:
            if count % 100 == 0:
                print str(count) + ' of ' + str(len(query_inds))
            k = b.new_key(name)
            k.set_contents_from_filename(meta['filename'][ind], policy='public-read')
    return urls
def preprocess_lables(larray):
  larray = larray[:]
  labels_unique = np.unique(larray)
  s = larray.argsort()
  cat_s = larray[s]
  ss = np.array([0] + ((cat_s[1:] != cat_s[:-1]).nonzero()[0] + 1).tolist() + [len(cat_s)])
  ssd = ss[1:] - ss[:-1]
  labels = np.repeat(np.arange(len(labels_unique)), ssd)
  larray = labels[fast.perminverse(s)]
  return larray.astype(np.int64)
Exemplo n.º 5
0
def main():
    print "parsing options..."
    parser = OptionParser()
    parser.add_option("-f", "--feature-dir", dest="feature_dir")
    parser.add_option("-n", "--Nsubsample", type="int", dest="Nsub", default=None)
    parser.add_option("-m", "--metric", dest="metric", default='pn')
    (options, args) = parser.parse_args()
    
    feature_dir = options.feature_dir
    Nsub = options.Nsub
    metric = options.metric
    
    fds = feature_dir.split(',')
    
    features = cPickle.load(open(fds[0]))
    #features = assemble_feature_batches(fds[0])
    perm = np.random.RandomState(0).permutation(features.shape[0])
    pinv = fast.perminverse(perm)
    features = features[pinv] 
    if len(fds) > 1:
        features2 = cPickle.load(open(fds[1]))
#        features2 = assemble_feature_batches(fds[1])
        perm = np.random.RandomState(0).permutation(features2.shape[0])
        pinv = fast.perminverse(perm)
        features = np.append( features, features2[pinv], axis=1 )
    if Nsub is not None:
        features = features[pinv][:,perm[:Nsub]]
    
    RR = {}
    if 'p' in metric:
        print "Compute hvm performance..."
        RR['p'] = compute_perf(features, var_levels=['V6'])
    if 'n' in metric:
        print "Compute hvm neural fit..."
        RR['n'] = compute_nfit(features)
    
    cPickle.dump( RR, open(fds[0] + "result_hvm_" + str(Nsub) + ".p", "wb") )
Exemplo n.º 6
0
def assemble_feature_batches(feat_dir, N=None, seed=0, batch_range=None, shape_reduce=None, perm=None):
    bns = get_batches(feat_dir)
    if batch_range is not None:
        bns = bns[batch_range[0]: batch_range[1]]
    data = []
    for x in bns:
        ft = unpickle(os.path.join(feat_dir, 'data_batch_%d' % x))['data']
        if shape_reduce is not None:
            nf0, nf1 = ft.shape
            s = int(np.sqrt(nf1 / float(shape_reduce)))
            ft = ft.reshape((nf0, shape_reduce, s, s))
            ft1 = ft.mean(2).mean(2)
            ft2 = ft.sum(1).reshape((nf0, s * s))
            if N is not None:
                nf1 = ft1.shape[1]
                nf2 = ft2.shape[1]
                rng = np.random.RandomState(seed=seed)
                units1 = rng.permutation(nf1)[: N/2]
                units2 = rng.permutation(nf2)[: N - len(units1)]
                ft = np.column_stack([ft1[:, units1], ft2[:, units2]])
            else:
                ft = np.column_stack([ft1, ft2])
        elif N is not None:
            print('subsetting batch %d' % x)
            ft = ft[:, np.random.RandomState(seed=seed).permutation(ft.shape[1])[:N]]
        #data.append(unpickle(os.path.join(feat_dir, 'data_batch_%d' % x))['data'])
        data.append(ft)
    data = np.row_stack(data)
    if perm == 'random':
        _perm = np.random.RandomState(0).permutation(len(data))
        pinv = fast.perminverse(_perm)
        data = data[pinv]
    elif perm == None:
        pass
    else:
        raise ValueError("unknown perm type: %s" % perm)
    return data
Exemplo n.º 7
0
if '.p' in feature_file:
  features = cPickle.load(open(feature_file))
elif '.h5' in feature_file:
  f_feat = h5py.File(feature_file, 'r')
  bns = ['bn'+str(i) for i in range(23)]
  F = f_feat[bns[0]][:].copy()
  for bn in bns[1:]:
      Fbn = f_feat[bn][:].copy()
      F = np.append(F, Fbn, axis=0)
  if 'hvm' in feature_file:
      F = F[:5760]  
  features = F
else:
  print "Incorrect feature file extension!"

pinv = fast.perminverse(perm)
if len(features.shape) > 2:
  print "reshaping..."
  features = np.reshape(features, (features.shape[0], features.shape[1]*features.shape[2]*features.shape[3]))
if (nfeat is not None) and (features.shape[1] > nfeat):
  print "Select " + str(nfeat) + " features..."
  perm = np.random.RandomState(0).permutation(features.shape[1])
  features = features[:,perm[:nfeat]]
if features.shape[1] > 5760:
  print "random select..."
  perm = np.random.RandomState(0).permutation(features.shape[1])
  features = features[:,perm[:5760]]
features = features[pinv]

## hvm tasks
#print "Computing for hvm tasks..."
Exemplo n.º 8
0
from rosch_analysis import rosch_analysis
import numpy as np
import yamutils.fast as fast
import cPickle

import dldata.stimulus_sets.synthetic.synthetic_datasets as sd

dataset = sd.RoschDataset3_simple74testFewer()

features = cPickle.load(open("/om/user/hyo/caffe/features/catInet_roschSimple74testFewer_fc7.p"))
perm = np.random.RandomState(0).permutation(len(features))
pinv = fast.perminverse(perm)
features = features[pinv]
# start = 792320
# end = 792320 + 23*256

R = rosch_analysis(features, hvm_dataset, ntest=5, do_centroids=False, var_levels=["V6"])