示例#1
0
def get_features_by_name(feature_name):

    # TODO: Add support for NYU and other features

    if feature_name == 'IT':
        import dldata.stimulus_sets.hvm as hvm
        dataset = hvm.HvMWithDiscfade()
        features = dataset.neuronal_features[:, dataset.IT_NEURONS]
        meta = dataset.meta

    elif feature_name == 'V4':
        import dldata.stimulus_sets.hvm as hvm
        dataset = hvm.HvMWithDiscfade()
        features = dataset.neuronal_features[:, dataset.V4_NEURONS]
        meta = dataset.meta

    elif feature_name == 'NYU':
        import dldata.stimulus_sets.hvm as hvm
        dataset = hvm.HvMWithDiscfade()
        features = dataset.get_features(
            dict(crop=None,
                 dtype=u'float32',
                 mask=None,
                 mode=u'RGB',
                 normalize=False,
                 resize_to=[256, 256]), ObjectId('542927872c39ac23120db840'),
            u'fc6')[:]
        meta = dataset.meta

    return features, meta
示例#2
0
def create_xarray(savepath):
    '''Packages the DataArray (stimulus set the same as HvM). Returns an xarray of ["neuroid", "presentation", "time_bin"]
    Note: using my "10ms" branch of dldata'''
    from dldata.stimulus_sets import hvm
    dataset = hvm.HvMWithDiscfade()
    assembly = dataset.xr_from_hvm_10ms_temporal()
    assembly.reset_index(assembly.indexes.keys(), inplace=True)
    assembly.to_netcdf(savepath)
    return assembly
def get_decoder_model_by_name(decoder_model_name):
    if decoder_model_name == 'StandardModel':
        all_ids = list(hvm.HvMWithDiscfade().meta['_id'])
        imageset1 = [all_ids[i] for i in ImageSet1_inds]
        not_imageset1 = list(set(all_ids) - set(imageset1))
        model = dict(name='StandardModel',
                     train_q={
                         'var': ['V6'],
                         '_id': not_imageset1
                     },
                     test_q=dict(_id=imageset1),
                     labelfunc='category',
                     split_by=None,
                     npc_train=None,
                     npc_test=len(imageset1),
                     npc_validate=0,
                     num_splits=1,
                     metric_screen="classifier",
                     metric_kwargs={
                         'model_type': 'svm.LinearSVC',
                         'model_kwargs': {
                             'GridSearchCV_params': {
                                 'C': [
                                     1e-5, 1e-4, 1e-3, .25e-3, .5e-3, .75e-3,
                                     1e-2, .25e-2, .5e-2, .75e-2, 1e-1, 1, 10
                                 ]
                             },
                             'GridSearchCV_kwargs': {
                                 'n_jobs': 1
                             }
                         }
                     })
        return model

    if decoder_model_name == 'StandardModelWithMargins':
        model = get_decoder_model_by_name('StandardModel')
        model['metric_kwargs']['margins'] = True
        return model

    if decoder_model_name == 'LogisticRegressionModel':
        model = get_decoder_model_by_name('StandardModel')
        model['metric_kwargs'][
            'model_type'] = 'linear_model.LogisticRegression'
        model['metric_kwargs']['probabilities'] = True
        return model

    if decoder_model_name == 'SVMModel':
        model = get_decoder_model_by_name('StandardModel')
        model['metric_kwargs']['model_type'] = 'svm.SVC'
        model['metric_kwargs']['model_kwargs']['kernel'] = 'linear'
        model['metric_kwargs']['model_kwargs']['probability'] = True
        model['metric_kwargs']['probabilities'] = True
        return model

    else:
        raise ValueError, 'Model not recognized'
示例#4
0
    def createTrials(self):

        dataset = hvm.HvMWithDiscfade()
        preproc = None

        dummy_upload = True
        image_bucket_name = 'hvm_timing'
        seed = 0

        meta = dataset.meta
        #query_inds = np.arange(len(meta))
        #query#_inds = ((np.sqrt(meta['rxy']**2) > 30) &  (np.sqrt(meta['rxz']**2) > 30) & (np.sqrt(meta['ryz']**2) > 30)).nonzero()[0]
        #query_inds = ((meta['var'] == 'V6') &  (meta['category'] == 'Faces')).nonzero()[0]
        #query_inds = ((meta['var'] == 'V6')  & (meta['category'] == 'Faces')).nonzero()[0]
        #query_inds = ((np.sqrt(meta['ryz']**2) > 0) & (np.sqrt((meta['rxy']**2 + meta['rxz']**2)) <  10) &  (meta['category'] == 'Tables')).nonzero()[0]
        #query_inds = ((np.sqrt(meta['ryz']**2) > 0) & (np.sqrt((meta['rxy']**2 + meta['rxz']**2)) <  50) &  (meta['category'] == 'Chairs')).nonzero()[0]
        query_inds = ((meta['var'] == 'V6') &
                      (meta['obj'] == '_01_Airliner_2jetEngines')).nonzero()[0]
        #query_inds = ((meta['var'] == 'V6') & (meta['obj'] == 'face0001')).nonzero()[0]
        #aquery_inds = ((meta['var'] == 'V6') &  (meta['obj'] == 'bear')).nonzero()[0]

        urls = dataset.publish_images(query_inds,
                                      preproc,
                                      image_bucket_name,
                                      dummy_upload=dummy_upload)

        rng = np.random.RandomState(seed=seed)
        perm = rng.permutation(len(query_inds))

        nblocks = int(math.ceil(float(len(perm)) / BSIZE))
        print('%d blocks' % nblocks)
        imgs = []
        imgData = []
        for bn in range(nblocks)[:2]:
            pinds = perm[BSIZE * bn:BSIZE * (bn + 1)]
            pinds = np.concatenate([pinds, pinds[:REPEATS]])
            rng.shuffle(pinds)
            if bn == 0:
                learning = perm[-LEARNING_PERIOD:]
            else:
                learning = perm[BSIZE * bn - LEARNING_PERIOD:BSIZE * bn]
            pinds = np.concatenate([learning, pinds])
            assert (bn + 1 == nblocks) or (len(pinds) == BSIZE + REPEATS +
                                           LEARNING_PERIOD), len(pinds)
            bmeta = meta[query_inds[pinds]]
            burls = [urls[_i] for _i in pinds]
            bmeta = [{df: bm[df] for df in meta.dtype.names} for bm in bmeta]
            imgs.extend(burls)
            imgData.extend(bmeta)
        self._trials = {'imgFiles': imgs, 'imgData': imgData}
    def createTrials(self):

        dataset = hvm.HvMWithDiscfade()
        preproc = None

        dummy_upload = True
        image_bucket_name = 'hvm_timing'
        seed = 0

        meta = dataset.meta
        extended_meta = dataset.extended_meta
        query_inds = (extended_meta['axis_bb_top'] > 0).nonzero()[0]

        urls = dataset.publish_images(query_inds, preproc,
                                      image_bucket_name, dummy_upload=dummy_upload)

        rng = np.random.RandomState(seed=seed)
        perm = rng.permutation(len(query_inds))

        additional = ('area_bb_0_x',
                      'area_bb_0_y',
                      'area_bb_1_x',
                      'area_bb_1_y',
                      'area_bb_2_x',
                      'area_bb_2_y',
                      'area_bb_3_x',
                      'area_bb_3_y')

        nblocks = int(math.ceil(float(len(perm))/BSIZE))
        print('%d blocks' % nblocks)
        imgs = []
        imgData = []
        for bn in range(nblocks)[:]:
            pinds = perm[BSIZE * bn: BSIZE * (bn + 1)]
            pinds = np.concatenate([pinds, pinds[: REPEATS]])
            rng.shuffle(pinds)
            if bn == 0:
                learning = perm[-LEARNING_PERIOD: ]
            else:
                learning = perm[BSIZE * bn - LEARNING_PERIOD: BSIZE*bn]
            pinds = np.concatenate([learning, pinds])
            assert (bn + 1 == nblocks) or (len(pinds) == BSIZE + REPEATS + LEARNING_PERIOD), len(pinds)
            bmeta = extended_meta[query_inds[pinds]]
            burls = [urls[_i] for _i in pinds]
            names = meta.dtype.names + additional
            names = [ n for n in names if not 'semantic' in n ]
            bmeta = [{df: bm[df] for df in names} for bm in bmeta]
            imgs.extend(burls)
            imgData.extend(bmeta)
        self._trials = {'imgFiles': imgs, 'imgData': imgData}
示例#6
0
def get_human_data_densely_sampled():
    dataset = hvm.HvMWithDiscfade()
    raw_data = confusion_matrices.get_data('hvm_dense_smp_v6_2rpw',
                                           field='category')
    # Add rep number to raw data, then clean
    which_rep = {}
    for worker in np.unique(raw_data['WorkerId']):
        which_rep[worker] = {}
        for filename in np.unique(raw_data['filename']):
            which_rep[worker][filename] = 0
    rep = np.zeros(raw_data['filename'].shape[0])
    for i, trial in enumerate(raw_data):
        filename = trial['filename']
        worker = trial['WorkerId']
        rep[i] = which_rep[worker][filename]
        which_rep[worker][filename] += 1
    raw_data_with_rep = raw_data.addcols([rep], names=['rep'])

    # Get rid of everything but first two reps, get rid of learning reps (Images of V3 and V0)
    data = raw_data_with_rep[raw_data_with_rep['rep'] < 2]
    data = data[data['var'] == 'V6']

    # Reformat to matrix
    human_matrix = []  # images, reps, worker
    canonical_order = dataset.meta['_id'][ImageSet1_inds]
    workers = np.unique(data['WorkerId'])
    n_workers = len(workers)
    for worker in workers:
        worker_data = data[data['WorkerId'] == worker]
        rep0 = worker_data[worker_data['rep'] == 0]
        rep1 = worker_data[worker_data['rep'] == 1]
        c0 = []
        c1 = []
        for Imid in canonical_order:
            c0.append(rep0[rep0['_id'] == Imid]['correct'])
            c1.append(rep1[rep1['_id'] == Imid]['correct'])
        X = np.column_stack([np.array(c0), np.array(c1)])
        X = np.expand_dims(X, 2)
        human_matrix.append(X)
        assert set(np.unique(worker_data['_id'])) == set(canonical_order)
    human_matrix = np.concatenate(human_matrix, 2)
    assert human_matrix.shape == (128, 2, n_workers)
    human_individuals = deepcopy(human_matrix)
    human_reps = np.concatenate((human_matrix[:, 0, :], human_matrix[:, 1, :]),
                                1)
    return human_reps, human_individuals, raw_data_with_rep
示例#7
0
    def createTrials(self):

        dataset = hvm.HvMWithDiscfade()
        preproc = None

        dummy_upload = True
        image_bucket_name = 'hvm_images_for_position'
        seed = 0

        meta = dataset.meta
        query_inds = (meta['var'] == 'V6').nonzero()[0]

        urls = dataset.publish_images(query_inds,
                                      preproc,
                                      image_bucket_name,
                                      dummy_upload=dummy_upload)

        rng = np.random.RandomState(seed=seed)
        perm = rng.permutation(len(query_inds))

        bsize = 50
        nblocks = int(math.ceil(float(len(perm)) / bsize))
        print('%d blocks' % nblocks)
        imgs = []
        imgData = []
        for bn in range(nblocks):
            pinds = perm[bsize * bn:bsize * (bn + 1)]
            pinds2 = np.concatenate([pinds, pinds.copy()])
            perm0 = rng.permutation(len(pinds2))
            pinds2 = pinds2[perm0]
            bmeta = meta[query_inds[pinds2]]
            burls = [urls[_i] for _i in pinds2]
            bmeta = [{df: bm[df] for df in meta.dtype.names} for bm in bmeta]
            imgs.extend(burls)
            imgData.extend(bmeta)
        self._trials = {'imgFiles': imgs, 'imgData': imgData}
import thing4a
import dldata.stimulus_sets.hvm as hvm
from collections import OrderedDict

preproc = OrderedDict([(u'normalize', False), (u'dtype', u'float32'),
                       (u'resize_to', [256, 256, 3]), (u'mode', u'RGB'),
                       (u'crop', None), (u'mask', None)])

dataset = hvm.HvMWithDiscfade()
imgs = dataset.get_images(preproc=preproc)
pf = '/home/ubuntu/new/caffe/vgg_avg2.prototxt'
sf = '/home/ubuntu/new/caffe/vgg_normalised.caffemodel'

#im0 = 255*imgs[0].transpose((2, 0, 1)).reshape((1, 3, 256, 256))
#im1 = 255*imgs[-1].transpose((2, 0, 1)).reshape((1, 3, 256, 256))
meta = dataset.meta
inds = (meta['category'] == 'Cars').nonzero()[0]
ims = [
    255 * imgs[i][14:-15][:, 14:-15].transpose((2, 0, 1)).reshape(
        (1, 3, 227, 227)) for i in inds
]
import numpy as np
from PIL import Image
im = np.asarray(
    Image.open('/home/ubuntu/imgres.jpg').resize(
        (256, 256), resample=Image.ANTIALIAS)).transpose(2, 0, 1).reshape(
            (1, 3, 256, 256))[:, :, 16:-16][:, :, :, 16:-16]

im1 = np.asarray(
    Image.open('/home/ubuntu/test.png').convert('RGB').resize(
        (256, 256), resample=Image.ANTIALIAS)).transpose(2, 0, 1).reshape(
示例#9
0
def get_exp(category, sandbox=True, dummy_upload=True):

    dataset = hvm.HvMWithDiscfade()
    meta = dataset.meta
    inds = (meta['category'] == category).nonzero()[0]
    meta = meta[inds]
    objs = np.unique(meta['obj'])
    combs = [objs]
    preproc = None
    image_bucket_name = 'hvm_timing'
    urls = dataset.publish_images(inds,
                                  preproc,
                                  image_bucket_name,
                                  dummy_upload=dummy_upload)

    base_url = 'https://s3.amazonaws.com/hvm_timing/'
    obj_resp_ids = [meta[meta['obj'] == o]['id'][0] for o in objs]
    response_images = [{
        'urls': [base_url + obj_id + '.png' for obj_id in obj_resp_ids],
        'meta': [{
            'obj': obj,
            'category': category
        } for obj in objs],
        'labels':
        objs
    }]

    mult = 2
    html_data = {
        'response_images': response_images,
        'combs': combs,
        'num_trials': 90 * 8 * mult,
        'meta_field': 'obj',
        'meta': tb.tab_rowstack([meta] * mult),
        'urls': urls * mult,
        'shuffle_test': False,
    }

    additionalrules = [{
        'old': 'LEARNINGPERIODNUMBER',
        'new': str(LEARNING_PERIOD)
    }, {
        'old': 'OBJTYPE',
        'new': category
    }]

    trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32 + 16
    exp = MatchToSampleFromDLDataExperiment(
        htmlsrc='hvm_subordinate.html',
        htmldst='hvm_subordinate_' + category + '_n%05d.html',
        tmpdir='tmp_subordinate_%s' % category,
        sandbox=sandbox,
        title='Object recognition --- report what you see',
        reward=0.35,
        duration=1500,
        keywords=[
            'neuroscience', 'psychology', 'experiment', 'object recognition'
        ],  # noqa
        description=
        "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
        comment="hvm subordinate identification",  # noqa
        collection_name='hvm_subordinate_identification_%s' % category,
        max_assignments=1,
        bucket_name='hvm_subordinate_identification_test',
        trials_per_hit=trials_per_hit,  # 144 + 8x4 repeats + 16 training
        html_data=html_data,
        frame_height_pix=1200,
        othersrc=[
            'objnames.js', '../../lib/dltk.js', '../../lib/dltkexpr.js',
            '../../lib/dltkrsvp.js'
        ],
        additionalrules=additionalrules,
        log_prefix='subordinate_' + category + '_')

    # -- create trials
    exp.createTrials(sampling='without-replacement', verbose=1)
    n_total_trials = len(exp._trials['imgFiles'])
    assert n_total_trials == 90 * 8 * mult, n_total_trials

    # -- in each HIT, the followings will be repeated 4 times to
    # estimate "quality" of data

    ind_repeats = repeat_inds[category] * REPEATS_PER_QE_IMG
    rng = np.random.RandomState(0)
    rng.shuffle(ind_repeats)
    trials_qe = {
        e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats]
        for e in exp._trials
    }

    ind_learn = practice_inds[category]
    goodids = [meta[i]['id'] for i in ind_learn]

    trials_lrn = {}
    for e in exp._trials:
        trials_lrn[e] = []
        got = []
        for _ind, r in enumerate(exp._trials[e]):
            if exp._trials['imgData'][_ind]['Sample'][
                    'id'] in goodids and exp._trials['imgData'][_ind][
                        'Sample']['id'] not in got:
                trials_lrn[e].append(copy.deepcopy(r))
                got.append(exp._trials['imgData'][_ind]['Sample']['id'])
    assert len(trials_lrn['imgData']) == len(goodids), len(
        trials_lrn['imgData'])

    offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT /
                        float(len(ind_repeats))).round().astype('int')

    n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT
    n_applied_hits = 0
    for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1,
                                -ACTUAL_TRIALS_PER_HIT):
        for k in trials_qe:
            for i, offset in enumerate(offsets):
                exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i])
        n_applied_hits += 1

    for j in range(n_applied_hits):
        for k in trials_lrn:
            for i in range(len(ind_learn)):
                exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i])

    #shuffle test on a per-hit basis
    for j in range(n_applied_hits):
        rng = np.random.RandomState(seed=j)
        perm = rng.permutation(8)
        for i in range(
                trials_per_hit * j,
                min(trials_per_hit * (j + 1), len(exp._trials['imgFiles']))):
            f = copy.deepcopy(exp._trials['imgFiles'][i])
            t = copy.deepcopy(exp._trials['imgData'][i])
            f[1] = [f[1][_j] for _j in perm]
            exp._trials['imgFiles'][i] = f
            t['Test'] = [t['Test'][_j] for _j in perm]
            exp._trials['imgData'][i] = t
            l = copy.deepcopy(exp._trials['labels'][i])
            exp._trials['labels'][i] = [l[_j] for _j in perm]

    print '** n_applied_hits =', n_applied_hits
    print '** len for each in _trials =', \
            {e: len(exp._trials[e]) for e in exp._trials}

    # -- sanity check
    assert n_hits_floor == n_applied_hits == mult * 5, (n_hits_floor,
                                                        n_applied_hits)
    assert len(exp._trials['imgFiles']) == mult * (720 + 5 * (32 + 16)), len(
        exp._trials['imgFiles'])
    """
    s_ref_labels = set([tuple(e) for e in trials_qe['labels']])
    print(s_ref_labels)
    offsets2 = np.arange(8 * 4)[::-1] + offsets

    ibie = zip(range(0, 720 + 4 * 32, trials_per_hit), range(trials_per_hit, 720 + 4 * 32 + trials_per_hit, trials_per_hit))
    assert all([set([tuple(e) for e in
        np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels
        for ib, ie in ibie[:-1]])
    print '** Finished creating trials.'
    """

    return exp, html_data
示例#10
0
def get_exp(sandbox=True, debug=False, dummy_upload=True):
    dataset = hvm.HvMWithDiscfade()
    meta = dataset.meta
    response_images = []
    categories = np.unique(meta['category'])
    cat_combs = [e for e in itertools.combinations(categories, 2)]
    response_images.extend([{
        'urls': [get_url_labeled_resp_img(c1),
                 get_url_labeled_resp_img(c2)],
        'meta': [{
            'category': category
        } for category in [c1, c2]],
        'labels': [c1, c2]
    } for c1, c2 in cat_combs])
    combs = cat_combs

    urls = dataset.publish_images(range(len(dataset.meta)),
                                  None,
                                  'hvm_timing',
                                  dummy_upload=dummy_upload)

    with open(path.join(path.dirname(__file__), 'tutorial_html_basic'),
              'r') as tutorial_html_file:
        tutorial_html = tutorial_html_file.read()
    label_func = lambda x: hvm.OBJECT_NAMES[x['obj']]
    html_data = {
        'combs': combs,
        'response_images': response_images,
        'num_trials': 125 * 2,
        'meta_field': 'category',
        'meta': meta,
        'urls': urls,
        'shuffle_test': True,
        'meta_query': lambda x: x['var'] == 'V6',
        'label_func': label_func
    }
    cat_dict = {
        'Animals': 'Animal',
        'Boats': 'Boat',
        'Cars': 'Car',
        'Chairs': 'Chair',
        'Faces': 'Face',
        'Fruits': 'Fruit',
        'Planes': 'Plane',
        'Tables': 'Table'
    }

    additionalrules = [{
        'old': 'LEARNINGPERIODNUMBER',
        'new': str(10)
    }, {
        'old': 'OBJTYPE',
        'new': 'Object Recognition'
    }, {
        'old': 'TUTORIAL_HTML',
        'new': tutorial_html
    }, {
        'old': 'CATDICT',
        'new': json.dumps(cat_dict)
    }, {
        'old': 'METAFIELD',
        'new': "'category'"
    }]

    exp = MatchToSampleFromDLDataExperiment(
        htmlsrc='web/general_two_way.html',
        htmldst='hvm_basic_2way_n%05d.html',
        sandbox=sandbox,
        title=
        'Object recognition --- report what you see. Up to 50 cent performance based bonus',
        reward=0.25,
        duration=1600,
        keywords=[
            'neuroscience', 'psychology', 'experiment', 'object recognition'
        ],  # noqa
        description=
        "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
        comment='hvm_basic_2ways',
        collection_name='hvm_basic_2ways',
        max_assignments=1,
        bucket_name='hvm_2ways',
        trials_per_hit=ACTUAL_TRIALS_PER_HIT + 24,  # 140 + 6x4 repeats
        html_data=html_data,
        tmpdir='tmp',
        frame_height_pix=1200,
        othersrc=[
            '../../lib/dltk.js', '../../lib/dltkexpr.js',
            '../../lib/dltkrsvp.js'
        ],
        additionalrules=additionalrules)

    # -- create trials
    exp.createTrials(sampling='with-replacement', verbose=1)
    n_total_trials = len(exp._trials['imgFiles'])
    assert n_total_trials == (8 * 7 / 2) * 250
    if debug:
        return exp, html_data

    # -- in each HIT, the followings will be repeated 4 times to
    # estimate "quality" of data
    ind_repeats = [0, 4, 47, 9, 17, 18] * REPEATS_PER_QE_IMG
    rng = np.random.RandomState(0)
    rng.shuffle(ind_repeats)
    trials_qe = {
        e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats]
        for e in exp._trials
    }

    # -- flip answer choices of some repeated images
    n_qe = len(trials_qe['labels'])
    # if True, flip
    flips = [True] * (n_qe / 2) + [False] * (n_qe - n_qe / 2)
    assert len(flips) == n_qe
    rng.shuffle(flips)
    assert len(trials_qe.keys()) == 4

    for i, flip in enumerate(flips):
        if not flip:
            continue
        trials_qe['imgFiles'][i][1].reverse()
        trials_qe['labels'][i].reverse()
        trials_qe['imgData'][i]['Test'].reverse()

    # -- actual application
    offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT /
                        float(len(ind_repeats))).round().astype('int')
    assert len(offsets) == len(offsets)

    n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT
    n_applied_hits = 0
    for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1,
                                -ACTUAL_TRIALS_PER_HIT):
        for k in trials_qe:
            for i, offset in enumerate(offsets):
                exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i])
                # exp._trials[k].insert(i_trial_begin + offset, 'test')
        n_applied_hits += 1

    print '** n_applied_hits =', n_applied_hits
    print '** len for each in _trials =', \
        {e: len(exp._trials[e]) for e in exp._trials}

    # -- sanity check
    assert 50 == n_applied_hits, n_applied_hits
    assert len(exp._trials['imgFiles']) == 50 * 164
    s_ref_labels = [tuple(e) for e in trials_qe['labels']]
    offsets2 = np.arange(24)[::-1] + offsets
    ibie = zip(range(0, 50 * 164, 164), range(164, 50 * 164, 164))
    assert all([[(e1, e2)
                 for e1, e2 in np.array(exp._trials['labels'][ib:ie])[offsets2]
                 ] == s_ref_labels for ib, ie in ibie])

    # -- drop unneeded, potentially abusable stuffs
    #del exp._trials['imgData']
    print '** Finished creating trials.'

    return exp, html_data
def get_exp(sandbox=True, dummy_upload=True):

    dataset = hvm.HvMWithDiscfade()
    # meta = dataset.meta ###
    meta_H = dataset.meta  ###
    #inds = np.arange(len(meta))

    n_repeats = 2
    #get inds and practice_inds from file
    inds = list(np.load('inds.npy'))
    practice_inds = list(np.load('practice_inds.npy'))
    assert len(inds) == 128
    inds = inds * n_repeats

    def test_inds(inds, n_repeats, practice_inds):
        assert len(inds) == 128 * n_repeats
        #Test that there are 4 per object, and 64 objects
        object_count = {}
        for i in inds:
            print 'Counting object %s' % (meta_H['obj'][i])
            object_count[meta_H['obj'][i]] = object_count.get(
                meta_H['obj'][i], 0) + 1
        print 'Number of unique objects'
        print object_count.keys()
        print len(object_count.keys())
        assert len(object_count.keys()) == 64
        for obj in object_count.keys():
            assert object_count[obj] == 2 * n_repeats
        assert len(np.unique(inds)) * n_repeats == len(inds)
        assert len(set(inds) & set(practice_inds)) == 0

    test_inds(inds, n_repeats, practice_inds)

    meta = meta_H[inds]  ###
    #n_hits_from_data = len(meta) / ACTUAL_TRIALS_PER_HIT
    n_hits_from_data = len(meta)  ###
    categories = np.unique(meta['category'])  # dataset.categories ###
    combs = [categories]

    preproc = None
    image_bucket_name = 'hvm_timing'
    urls = dataset.publish_images(range(len(meta_H)),
                                  preproc,
                                  image_bucket_name,
                                  dummy_upload=dummy_upload)

    base_url = 'https://canonical_images.s3.amazonaws.com/'
    response_images = [{
        'urls': [base_url + cat + '.png' for cat in categories],
        'meta': [{
            'category': 'Animals'
        }, {
            'category': 'Boats'
        }, {
            'category': 'Cars'
        }, {
            'category': 'Chairs'
        }, {
            'category': 'Faces'
        }, {
            'category': 'Fruits'
        }, {
            'category': 'Planes'
        }, {
            'category': 'Tables'
        }],
        'labels':
        categories
    }]

    mult = 15  ### 2
    #ind_repeats = repeat_inds * REPEATS_PER_QE_IMG ###
    #rng = np.random.RandomState(0) ###
    #rng.shuffle(ind_repeats) ###
    ind_learn = practice_inds

    html_data = {
        'response_images': response_images,
        'combs': combs,
        # 'num_trials': 90 * 64 * mult,
        'meta_field': 'category',
        'meta': meta_H,
        'idx_smp': inds,
        #'idx_rep': ind_repeats, ###
        'idx_lrn': ind_learn,
        'urls': urls,
        'n_hits': mult,
        'shuffle_test': False,
    }

    additionalrules = [{
        'old': 'LEARNINGPERIODNUMBER',
        'new': str(LEARNING_PERIOD)
    }]

    trials_per_hit = ACTUAL_TRIALS_PER_HIT + 16
    exp = SimpleMatchToSampleExperiment(
        htmlsrc='hvm_dense_smp_v6_2rpw.html',
        htmldst='hvm_dense_smp_v6_2rpw_n%05d.html',
        tmpdir='tmp_dense_smp_v6_2rpw',
        sandbox=sandbox,
        title='Object recognition --- report what you see',
        reward=0.15,
        duration=1500,
        keywords=[
            'neuroscience', 'psychology', 'experiment', 'object recognition'
        ],  # noqa
        description=
        "Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
        comment="hvm dense sampling of 128 V6 images, 2reps per worker",  # noqa
        collection_name='hvm_dense_smp_v6_2rpw',
        max_assignments=1,
        bucket_name='hvm_dense_smp_v6_2rpw',
        trials_per_hit=trials_per_hit,  # 144 + 8x4 repeats + 16 training
        html_data=html_data,
        frame_height_pix=1200,
        othersrc=['dltk.js', 'dltkexpr.js', 'dltkrsvp.js'],
        additionalrules=additionalrules,
        log_prefix='hvm_dense_smp_v6_2rpw__')
    # -- create trials
    exp.createTrials(verbose=1)
    all_ids = [m['Sample']['_id'] for m in exp._trials['imgData']]
    ids = set([str(_) for _ in np.unique(all_ids)])
    ids_test = set([str(_) for _ in np.load('first_experiment_ids.npy')])
    assert len(ids) == len(ids_test)
    assert ids == ids_test
    #exp.createTrials(sampling='with-replacement', verbose=1) ###
    n_total_trials = len(exp._trials['imgFiles'])
    #assert n_total_trials == mult * (len(meta) + 32 + 16) ###
    assert n_total_trials == mult * (len(meta) + 16)  ###

    return exp, html_data
示例#12
0
from hvm_analysis import hvm_analysis
import dldata.stimulus_sets.hvm as nd
import dldata.metrics.utils as utils
from hvm_analysis import post_process_neural_regression_msplit
import yamutils.fast as fast

feature_file = sys.argv[1]
nfeat = None
if len(sys.argv) > 2:
    nfeat = int(sys.argv[2])

print "Start computing for " + feature_file

# Get the dataset
hvm_dataset = nd.HvMWithDiscfade()

# Get features
perm = np.random.RandomState(0).permutation(5760)
if '.p' in feature_file:
  features = cPickle.load(open(feature_file))
elif '.h5' in feature_file:
  f_feat = h5py.File(feature_file, 'r')
  bns = ['bn'+str(i) for i in range(23)]
  F = f_feat[bns[0]][:].copy()
  for bn in bns[1:]:
      Fbn = f_feat[bn][:].copy()
      F = np.append(F, Fbn, axis=0)
  if 'hvm' in feature_file:
      F = F[:5760]  
  features = F
示例#13
0
def get_exp(sandbox=True, dummy_upload=True):

    dataset = hvm.HvMWithDiscfade()
    meta = dataset.meta
    categories = dataset.categories
    combs = [categories]

    inds = np.arange(len(meta))
    preproc = None
    image_bucket_name = 'hvm_timing'
    urls = dataset.publish_images(inds, preproc,
                                  image_bucket_name,
                                  dummy_upload=dummy_upload)

    base_url = 'https://canonical_images.s3.amazonaws.com/'
    response_images = [{
        'urls': [base_url + cat + '.png' for cat in categories],
        'meta': [{'category': 'Animals'},
                 {'category': 'Boats'},
                 {'category': 'Cars'},
                 {'category': 'Chairs'},
                 {'category': 'Faces'},
                 {'category': 'Fruits'},
                 {'category': 'Planes'},
                 {'category': 'Tables'}],
        'labels': categories}]

    html_data = {
            'response_images': response_images,
            'combs': combs,
            'num_trials': 90 * 64,
            'meta_field': 'category',
            'meta': meta,
            'urls': urls,
            'shuffle_test': True,
    }

    additionalrules = [{'old': 'LEARNINGPERIODNUMBER',
                    'new':  str(LEARNING_PERIOD)}]
    trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32
    exp = MatchToSampleFromDLDataExperiment(
            htmlsrc='hvm_basic_categorization.html',
            htmldst='hvm_basic_categorization_n%05d.html',
            sandbox=sandbox,
            title='Object recognition --- report what you see',
            reward=0.35,
            duration=1500,
            keywords=['neuroscience', 'psychology', 'experiment', 'object recognition'],  # noqa
            description="***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
            comment="hvm basic categorization",  # noqa
            collection_name= 'hvm_basic_categorization',
            max_assignments=1,
            bucket_name='hvm_basic_categorization',
            trials_per_hit=trials_per_hit,  # 150 + 8x4 repeats
            html_data=html_data,
            frame_height_pix=1200,
            othersrc = ['../../lib/dltk.js'],
            additionalrules=additionalrules

            )

    # -- create trials
    exp.createTrials(sampling='with-replacement', verbose=1)
    n_total_trials = len(exp._trials['imgFiles'])
    assert n_total_trials == 90 * 64, n_total_trials

    # -- in each HIT, the followings will be repeated 4 times to
    # estimate "quality" of data

    ind_repeats = [3440, 3282, 3321, 3802, 5000, 3202, 4041, 4200] * REPEATS_PER_QE_IMG
    rng = np.random.RandomState(0)
    rng.shuffle(ind_repeats)
    trials_qe = {e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats]
            for e in exp._trials}
    offsets = np.arange(
                ACTUAL_TRIALS_PER_HIT - 3, -1,
                -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats))
            ).round().astype('int')

    n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT
    n_applied_hits = 0
    for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT,
            -1, -ACTUAL_TRIALS_PER_HIT):
        for k in trials_qe:
            for i, offset in enumerate(offsets):
                exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i])
        n_applied_hits += 1

    print '** n_applied_hits =', n_applied_hits
    print '** len for each in _trials =', \
            {e: len(exp._trials[e]) for e in exp._trials}

    # -- sanity check
    assert 38 == n_applied_hits, n_applied_hits
    assert len(exp._trials['imgFiles']) == 6976, len(exp._trials['imgFiles'])
    s_ref_labels = set([tuple(e) for e in trials_qe['labels']])
    print(s_ref_labels)
    offsets2 = np.arange(8 * 4)[::-1] + offsets

    ibie = zip(range(0, 6976, trials_per_hit), range(trials_per_hit, 6976 + trials_per_hit, trials_per_hit))
    assert all([set([tuple(e) for e in
        np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels
        for ib, ie in ibie[:-1]])
    print '** Finished creating trials.'

    return exp, html_data
示例#14
0
def get_exp(sandbox=True, dummy_upload=True):
    LEARNING_PERIOD = 10

    practice_inds_low_var = range(0, 80, 10)
    practice_inds_high_var = range(3200, 3200 + 90 * 8, 90)
    practice_inds = practice_inds_low_var + practice_inds_high_var

    repeats_per_image = 2  ### 2
    workers_per_image = 60

    dataset = hvm.HvMWithDiscfade()
    # meta = dataset.meta ###
    meta_H = dataset.meta  ###
    #inds = np.arange(len(meta))

    inds = BehavioralBenchmark.experiments.image_level_benchmark.INDS
    assert (len(set(practice_inds).intersection(set(inds))) == 0)
    meta = meta_H[inds]  ###
    #n_hits_from_data = len(meta) / ACTUAL_TRIALS_PER_HIT
    categories = np.unique(meta['category'])  # dataset.categories ###
    combs = [categories]

    preproc = None
    image_bucket_name = 'hvm_timing'
    urls = dataset.publish_images(range(len(meta_H)),
                                  preproc,
                                  image_bucket_name,
                                  dummy_upload=dummy_upload)

    base_url = 'https://canonical_images.s3.amazonaws.com/'
    response_images = [{
        'urls': [base_url + cat + '.png' for cat in categories],
        'meta': [{
            'category': 'Animals'
        }, {
            'category': 'Boats'
        }, {
            'category': 'Cars'
        }, {
            'category': 'Chairs'
        }, {
            'category': 'Faces'
        }, {
            'category': 'Fruits'
        }, {
            'category': 'Planes'
        }, {
            'category': 'Tables'
        }],
        'labels':
        categories
    }]

    inds_shown_per_hit = inds * repeats_per_image
    rng = np.random.RandomState(0)
    ind_learn = practice_inds
    ###############################################################
    html_data = {
        'response_images': response_images,
        'combs': combs,
        # 'num_trials': 90 * 64 * mult,
        'meta_field': 'category',
        'meta': meta_H,
        'idx_smp': inds,
        'idx_lrn': ind_learn,
        'urls': urls,
        'n_hits': workers_per_image,
        'shuffle_test': False,
    }

    additionalrules = [{
        'old': 'LEARNINGPERIODNUMBER',
        'new': str(LEARNING_PERIOD)
    }]

    trials_per_hit = len(inds_shown_per_hit) + len(practice_inds)
    exp = SimpleMatchToSampleExperiment(
        htmlsrc='hvm_dense_smp_v6_s100.html',
        htmldst='hvm_dense_smp_v6_s100_n%05d.html',
        tmpdir='tmp_dense_smp_v6_s100',
        sandbox=sandbox,
        title='Object recognition --- report what you see',
        reward=0.10,
        duration=1500,
        keywords=[
            'neuroscience', 'psychology', 'experiment', 'object recognition'
        ],  # noqa
        description=
        "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
        comment="hvm dense sampling of 100 V6 images",  # noqa
        collection_name='hvm_dense_smp_v6_s100',
        max_assignments=1,
        bucket_name='hvm_dense_smp_v6_s100',
        trials_per_hit=trials_per_hit,  # 144 + 8x4 repeats + 16 training
        html_data=html_data,
        frame_height_pix=1200,
        othersrc=[
            '../../lib/dltk.js', '../../lib/dltkexpr.js',
            '../../lib/dltkrsvp.js'
        ],
        additionalrules=additionalrules,
        log_prefix='hvm_dense_smp_v6_s100__')

    # -- create trials
    exp.createTrials(verbose=1)
    #exp.createTrials(sampling='with-replacement', verbose=1) ###
    n_total_trials = len(exp._trials['imgFiles'])
    assert n_total_trials == mult * (len(meta) + 32 + 16)

    return exp, html_data