def get_features_by_name(feature_name): # TODO: Add support for NYU and other features if feature_name == 'IT': import dldata.stimulus_sets.hvm as hvm dataset = hvm.HvMWithDiscfade() features = dataset.neuronal_features[:, dataset.IT_NEURONS] meta = dataset.meta elif feature_name == 'V4': import dldata.stimulus_sets.hvm as hvm dataset = hvm.HvMWithDiscfade() features = dataset.neuronal_features[:, dataset.V4_NEURONS] meta = dataset.meta elif feature_name == 'NYU': import dldata.stimulus_sets.hvm as hvm dataset = hvm.HvMWithDiscfade() features = dataset.get_features( dict(crop=None, dtype=u'float32', mask=None, mode=u'RGB', normalize=False, resize_to=[256, 256]), ObjectId('542927872c39ac23120db840'), u'fc6')[:] meta = dataset.meta return features, meta
def create_xarray(savepath): '''Packages the DataArray (stimulus set the same as HvM). Returns an xarray of ["neuroid", "presentation", "time_bin"] Note: using my "10ms" branch of dldata''' from dldata.stimulus_sets import hvm dataset = hvm.HvMWithDiscfade() assembly = dataset.xr_from_hvm_10ms_temporal() assembly.reset_index(assembly.indexes.keys(), inplace=True) assembly.to_netcdf(savepath) return assembly
def get_decoder_model_by_name(decoder_model_name): if decoder_model_name == 'StandardModel': all_ids = list(hvm.HvMWithDiscfade().meta['_id']) imageset1 = [all_ids[i] for i in ImageSet1_inds] not_imageset1 = list(set(all_ids) - set(imageset1)) model = dict(name='StandardModel', train_q={ 'var': ['V6'], '_id': not_imageset1 }, test_q=dict(_id=imageset1), labelfunc='category', split_by=None, npc_train=None, npc_test=len(imageset1), npc_validate=0, num_splits=1, metric_screen="classifier", metric_kwargs={ 'model_type': 'svm.LinearSVC', 'model_kwargs': { 'GridSearchCV_params': { 'C': [ 1e-5, 1e-4, 1e-3, .25e-3, .5e-3, .75e-3, 1e-2, .25e-2, .5e-2, .75e-2, 1e-1, 1, 10 ] }, 'GridSearchCV_kwargs': { 'n_jobs': 1 } } }) return model if decoder_model_name == 'StandardModelWithMargins': model = get_decoder_model_by_name('StandardModel') model['metric_kwargs']['margins'] = True return model if decoder_model_name == 'LogisticRegressionModel': model = get_decoder_model_by_name('StandardModel') model['metric_kwargs'][ 'model_type'] = 'linear_model.LogisticRegression' model['metric_kwargs']['probabilities'] = True return model if decoder_model_name == 'SVMModel': model = get_decoder_model_by_name('StandardModel') model['metric_kwargs']['model_type'] = 'svm.SVC' model['metric_kwargs']['model_kwargs']['kernel'] = 'linear' model['metric_kwargs']['model_kwargs']['probability'] = True model['metric_kwargs']['probabilities'] = True return model else: raise ValueError, 'Model not recognized'
def createTrials(self): dataset = hvm.HvMWithDiscfade() preproc = None dummy_upload = True image_bucket_name = 'hvm_timing' seed = 0 meta = dataset.meta #query_inds = np.arange(len(meta)) #query#_inds = ((np.sqrt(meta['rxy']**2) > 30) & (np.sqrt(meta['rxz']**2) > 30) & (np.sqrt(meta['ryz']**2) > 30)).nonzero()[0] #query_inds = ((meta['var'] == 'V6') & (meta['category'] == 'Faces')).nonzero()[0] #query_inds = ((meta['var'] == 'V6') & (meta['category'] == 'Faces')).nonzero()[0] #query_inds = ((np.sqrt(meta['ryz']**2) > 0) & (np.sqrt((meta['rxy']**2 + meta['rxz']**2)) < 10) & (meta['category'] == 'Tables')).nonzero()[0] #query_inds = ((np.sqrt(meta['ryz']**2) > 0) & (np.sqrt((meta['rxy']**2 + meta['rxz']**2)) < 50) & (meta['category'] == 'Chairs')).nonzero()[0] query_inds = ((meta['var'] == 'V6') & (meta['obj'] == '_01_Airliner_2jetEngines')).nonzero()[0] #query_inds = ((meta['var'] == 'V6') & (meta['obj'] == 'face0001')).nonzero()[0] #aquery_inds = ((meta['var'] == 'V6') & (meta['obj'] == 'bear')).nonzero()[0] urls = dataset.publish_images(query_inds, preproc, image_bucket_name, dummy_upload=dummy_upload) rng = np.random.RandomState(seed=seed) perm = rng.permutation(len(query_inds)) nblocks = int(math.ceil(float(len(perm)) / BSIZE)) print('%d blocks' % nblocks) imgs = [] imgData = [] for bn in range(nblocks)[:2]: pinds = perm[BSIZE * bn:BSIZE * (bn + 1)] pinds = np.concatenate([pinds, pinds[:REPEATS]]) rng.shuffle(pinds) if bn == 0: learning = perm[-LEARNING_PERIOD:] else: learning = perm[BSIZE * bn - LEARNING_PERIOD:BSIZE * bn] pinds = np.concatenate([learning, pinds]) assert (bn + 1 == nblocks) or (len(pinds) == BSIZE + REPEATS + LEARNING_PERIOD), len(pinds) bmeta = meta[query_inds[pinds]] burls = [urls[_i] for _i in pinds] bmeta = [{df: bm[df] for df in meta.dtype.names} for bm in bmeta] imgs.extend(burls) imgData.extend(bmeta) self._trials = {'imgFiles': imgs, 'imgData': imgData}
def createTrials(self): dataset = hvm.HvMWithDiscfade() preproc = None dummy_upload = True image_bucket_name = 'hvm_timing' seed = 0 meta = dataset.meta extended_meta = dataset.extended_meta query_inds = (extended_meta['axis_bb_top'] > 0).nonzero()[0] urls = dataset.publish_images(query_inds, preproc, image_bucket_name, dummy_upload=dummy_upload) rng = np.random.RandomState(seed=seed) perm = rng.permutation(len(query_inds)) additional = ('area_bb_0_x', 'area_bb_0_y', 'area_bb_1_x', 'area_bb_1_y', 'area_bb_2_x', 'area_bb_2_y', 'area_bb_3_x', 'area_bb_3_y') nblocks = int(math.ceil(float(len(perm))/BSIZE)) print('%d blocks' % nblocks) imgs = [] imgData = [] for bn in range(nblocks)[:]: pinds = perm[BSIZE * bn: BSIZE * (bn + 1)] pinds = np.concatenate([pinds, pinds[: REPEATS]]) rng.shuffle(pinds) if bn == 0: learning = perm[-LEARNING_PERIOD: ] else: learning = perm[BSIZE * bn - LEARNING_PERIOD: BSIZE*bn] pinds = np.concatenate([learning, pinds]) assert (bn + 1 == nblocks) or (len(pinds) == BSIZE + REPEATS + LEARNING_PERIOD), len(pinds) bmeta = extended_meta[query_inds[pinds]] burls = [urls[_i] for _i in pinds] names = meta.dtype.names + additional names = [ n for n in names if not 'semantic' in n ] bmeta = [{df: bm[df] for df in names} for bm in bmeta] imgs.extend(burls) imgData.extend(bmeta) self._trials = {'imgFiles': imgs, 'imgData': imgData}
def get_human_data_densely_sampled(): dataset = hvm.HvMWithDiscfade() raw_data = confusion_matrices.get_data('hvm_dense_smp_v6_2rpw', field='category') # Add rep number to raw data, then clean which_rep = {} for worker in np.unique(raw_data['WorkerId']): which_rep[worker] = {} for filename in np.unique(raw_data['filename']): which_rep[worker][filename] = 0 rep = np.zeros(raw_data['filename'].shape[0]) for i, trial in enumerate(raw_data): filename = trial['filename'] worker = trial['WorkerId'] rep[i] = which_rep[worker][filename] which_rep[worker][filename] += 1 raw_data_with_rep = raw_data.addcols([rep], names=['rep']) # Get rid of everything but first two reps, get rid of learning reps (Images of V3 and V0) data = raw_data_with_rep[raw_data_with_rep['rep'] < 2] data = data[data['var'] == 'V6'] # Reformat to matrix human_matrix = [] # images, reps, worker canonical_order = dataset.meta['_id'][ImageSet1_inds] workers = np.unique(data['WorkerId']) n_workers = len(workers) for worker in workers: worker_data = data[data['WorkerId'] == worker] rep0 = worker_data[worker_data['rep'] == 0] rep1 = worker_data[worker_data['rep'] == 1] c0 = [] c1 = [] for Imid in canonical_order: c0.append(rep0[rep0['_id'] == Imid]['correct']) c1.append(rep1[rep1['_id'] == Imid]['correct']) X = np.column_stack([np.array(c0), np.array(c1)]) X = np.expand_dims(X, 2) human_matrix.append(X) assert set(np.unique(worker_data['_id'])) == set(canonical_order) human_matrix = np.concatenate(human_matrix, 2) assert human_matrix.shape == (128, 2, n_workers) human_individuals = deepcopy(human_matrix) human_reps = np.concatenate((human_matrix[:, 0, :], human_matrix[:, 1, :]), 1) return human_reps, human_individuals, raw_data_with_rep
def createTrials(self): dataset = hvm.HvMWithDiscfade() preproc = None dummy_upload = True image_bucket_name = 'hvm_images_for_position' seed = 0 meta = dataset.meta query_inds = (meta['var'] == 'V6').nonzero()[0] urls = dataset.publish_images(query_inds, preproc, image_bucket_name, dummy_upload=dummy_upload) rng = np.random.RandomState(seed=seed) perm = rng.permutation(len(query_inds)) bsize = 50 nblocks = int(math.ceil(float(len(perm)) / bsize)) print('%d blocks' % nblocks) imgs = [] imgData = [] for bn in range(nblocks): pinds = perm[bsize * bn:bsize * (bn + 1)] pinds2 = np.concatenate([pinds, pinds.copy()]) perm0 = rng.permutation(len(pinds2)) pinds2 = pinds2[perm0] bmeta = meta[query_inds[pinds2]] burls = [urls[_i] for _i in pinds2] bmeta = [{df: bm[df] for df in meta.dtype.names} for bm in bmeta] imgs.extend(burls) imgData.extend(bmeta) self._trials = {'imgFiles': imgs, 'imgData': imgData}
import thing4a import dldata.stimulus_sets.hvm as hvm from collections import OrderedDict preproc = OrderedDict([(u'normalize', False), (u'dtype', u'float32'), (u'resize_to', [256, 256, 3]), (u'mode', u'RGB'), (u'crop', None), (u'mask', None)]) dataset = hvm.HvMWithDiscfade() imgs = dataset.get_images(preproc=preproc) pf = '/home/ubuntu/new/caffe/vgg_avg2.prototxt' sf = '/home/ubuntu/new/caffe/vgg_normalised.caffemodel' #im0 = 255*imgs[0].transpose((2, 0, 1)).reshape((1, 3, 256, 256)) #im1 = 255*imgs[-1].transpose((2, 0, 1)).reshape((1, 3, 256, 256)) meta = dataset.meta inds = (meta['category'] == 'Cars').nonzero()[0] ims = [ 255 * imgs[i][14:-15][:, 14:-15].transpose((2, 0, 1)).reshape( (1, 3, 227, 227)) for i in inds ] import numpy as np from PIL import Image im = np.asarray( Image.open('/home/ubuntu/imgres.jpg').resize( (256, 256), resample=Image.ANTIALIAS)).transpose(2, 0, 1).reshape( (1, 3, 256, 256))[:, :, 16:-16][:, :, :, 16:-16] im1 = np.asarray( Image.open('/home/ubuntu/test.png').convert('RGB').resize( (256, 256), resample=Image.ANTIALIAS)).transpose(2, 0, 1).reshape(
def get_exp(category, sandbox=True, dummy_upload=True): dataset = hvm.HvMWithDiscfade() meta = dataset.meta inds = (meta['category'] == category).nonzero()[0] meta = meta[inds] objs = np.unique(meta['obj']) combs = [objs] preproc = None image_bucket_name = 'hvm_timing' urls = dataset.publish_images(inds, preproc, image_bucket_name, dummy_upload=dummy_upload) base_url = 'https://s3.amazonaws.com/hvm_timing/' obj_resp_ids = [meta[meta['obj'] == o]['id'][0] for o in objs] response_images = [{ 'urls': [base_url + obj_id + '.png' for obj_id in obj_resp_ids], 'meta': [{ 'obj': obj, 'category': category } for obj in objs], 'labels': objs }] mult = 2 html_data = { 'response_images': response_images, 'combs': combs, 'num_trials': 90 * 8 * mult, 'meta_field': 'obj', 'meta': tb.tab_rowstack([meta] * mult), 'urls': urls * mult, 'shuffle_test': False, } additionalrules = [{ 'old': 'LEARNINGPERIODNUMBER', 'new': str(LEARNING_PERIOD) }, { 'old': 'OBJTYPE', 'new': category }] trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32 + 16 exp = MatchToSampleFromDLDataExperiment( htmlsrc='hvm_subordinate.html', htmldst='hvm_subordinate_' + category + '_n%05d.html', tmpdir='tmp_subordinate_%s' % category, sandbox=sandbox, title='Object recognition --- report what you see', reward=0.35, duration=1500, keywords=[ 'neuroscience', 'psychology', 'experiment', 'object recognition' ], # noqa description= "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment="hvm subordinate identification", # noqa collection_name='hvm_subordinate_identification_%s' % category, max_assignments=1, bucket_name='hvm_subordinate_identification_test', trials_per_hit=trials_per_hit, # 144 + 8x4 repeats + 16 training html_data=html_data, frame_height_pix=1200, othersrc=[ 'objnames.js', '../../lib/dltk.js', '../../lib/dltkexpr.js', '../../lib/dltkrsvp.js' ], additionalrules=additionalrules, log_prefix='subordinate_' + category + '_') # -- create trials exp.createTrials(sampling='without-replacement', verbose=1) n_total_trials = len(exp._trials['imgFiles']) assert n_total_trials == 90 * 8 * mult, n_total_trials # -- in each HIT, the followings will be repeated 4 times to # estimate "quality" of data ind_repeats = repeat_inds[category] * REPEATS_PER_QE_IMG rng = np.random.RandomState(0) rng.shuffle(ind_repeats) trials_qe = { e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats] for e in exp._trials } ind_learn = practice_inds[category] goodids = [meta[i]['id'] for i in ind_learn] trials_lrn = {} for e in exp._trials: trials_lrn[e] = [] got = [] for _ind, r in enumerate(exp._trials[e]): if exp._trials['imgData'][_ind]['Sample'][ 'id'] in goodids and exp._trials['imgData'][_ind][ 'Sample']['id'] not in got: trials_lrn[e].append(copy.deepcopy(r)) got.append(exp._trials['imgData'][_ind]['Sample']['id']) assert len(trials_lrn['imgData']) == len(goodids), len( trials_lrn['imgData']) offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats))).round().astype('int') n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT n_applied_hits = 0 for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1, -ACTUAL_TRIALS_PER_HIT): for k in trials_qe: for i, offset in enumerate(offsets): exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i]) n_applied_hits += 1 for j in range(n_applied_hits): for k in trials_lrn: for i in range(len(ind_learn)): exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i]) #shuffle test on a per-hit basis for j in range(n_applied_hits): rng = np.random.RandomState(seed=j) perm = rng.permutation(8) for i in range( trials_per_hit * j, min(trials_per_hit * (j + 1), len(exp._trials['imgFiles']))): f = copy.deepcopy(exp._trials['imgFiles'][i]) t = copy.deepcopy(exp._trials['imgData'][i]) f[1] = [f[1][_j] for _j in perm] exp._trials['imgFiles'][i] = f t['Test'] = [t['Test'][_j] for _j in perm] exp._trials['imgData'][i] = t l = copy.deepcopy(exp._trials['labels'][i]) exp._trials['labels'][i] = [l[_j] for _j in perm] print '** n_applied_hits =', n_applied_hits print '** len for each in _trials =', \ {e: len(exp._trials[e]) for e in exp._trials} # -- sanity check assert n_hits_floor == n_applied_hits == mult * 5, (n_hits_floor, n_applied_hits) assert len(exp._trials['imgFiles']) == mult * (720 + 5 * (32 + 16)), len( exp._trials['imgFiles']) """ s_ref_labels = set([tuple(e) for e in trials_qe['labels']]) print(s_ref_labels) offsets2 = np.arange(8 * 4)[::-1] + offsets ibie = zip(range(0, 720 + 4 * 32, trials_per_hit), range(trials_per_hit, 720 + 4 * 32 + trials_per_hit, trials_per_hit)) assert all([set([tuple(e) for e in np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels for ib, ie in ibie[:-1]]) print '** Finished creating trials.' """ return exp, html_data
def get_exp(sandbox=True, debug=False, dummy_upload=True): dataset = hvm.HvMWithDiscfade() meta = dataset.meta response_images = [] categories = np.unique(meta['category']) cat_combs = [e for e in itertools.combinations(categories, 2)] response_images.extend([{ 'urls': [get_url_labeled_resp_img(c1), get_url_labeled_resp_img(c2)], 'meta': [{ 'category': category } for category in [c1, c2]], 'labels': [c1, c2] } for c1, c2 in cat_combs]) combs = cat_combs urls = dataset.publish_images(range(len(dataset.meta)), None, 'hvm_timing', dummy_upload=dummy_upload) with open(path.join(path.dirname(__file__), 'tutorial_html_basic'), 'r') as tutorial_html_file: tutorial_html = tutorial_html_file.read() label_func = lambda x: hvm.OBJECT_NAMES[x['obj']] html_data = { 'combs': combs, 'response_images': response_images, 'num_trials': 125 * 2, 'meta_field': 'category', 'meta': meta, 'urls': urls, 'shuffle_test': True, 'meta_query': lambda x: x['var'] == 'V6', 'label_func': label_func } cat_dict = { 'Animals': 'Animal', 'Boats': 'Boat', 'Cars': 'Car', 'Chairs': 'Chair', 'Faces': 'Face', 'Fruits': 'Fruit', 'Planes': 'Plane', 'Tables': 'Table' } additionalrules = [{ 'old': 'LEARNINGPERIODNUMBER', 'new': str(10) }, { 'old': 'OBJTYPE', 'new': 'Object Recognition' }, { 'old': 'TUTORIAL_HTML', 'new': tutorial_html }, { 'old': 'CATDICT', 'new': json.dumps(cat_dict) }, { 'old': 'METAFIELD', 'new': "'category'" }] exp = MatchToSampleFromDLDataExperiment( htmlsrc='web/general_two_way.html', htmldst='hvm_basic_2way_n%05d.html', sandbox=sandbox, title= 'Object recognition --- report what you see. Up to 50 cent performance based bonus', reward=0.25, duration=1600, keywords=[ 'neuroscience', 'psychology', 'experiment', 'object recognition' ], # noqa description= "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment='hvm_basic_2ways', collection_name='hvm_basic_2ways', max_assignments=1, bucket_name='hvm_2ways', trials_per_hit=ACTUAL_TRIALS_PER_HIT + 24, # 140 + 6x4 repeats html_data=html_data, tmpdir='tmp', frame_height_pix=1200, othersrc=[ '../../lib/dltk.js', '../../lib/dltkexpr.js', '../../lib/dltkrsvp.js' ], additionalrules=additionalrules) # -- create trials exp.createTrials(sampling='with-replacement', verbose=1) n_total_trials = len(exp._trials['imgFiles']) assert n_total_trials == (8 * 7 / 2) * 250 if debug: return exp, html_data # -- in each HIT, the followings will be repeated 4 times to # estimate "quality" of data ind_repeats = [0, 4, 47, 9, 17, 18] * REPEATS_PER_QE_IMG rng = np.random.RandomState(0) rng.shuffle(ind_repeats) trials_qe = { e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats] for e in exp._trials } # -- flip answer choices of some repeated images n_qe = len(trials_qe['labels']) # if True, flip flips = [True] * (n_qe / 2) + [False] * (n_qe - n_qe / 2) assert len(flips) == n_qe rng.shuffle(flips) assert len(trials_qe.keys()) == 4 for i, flip in enumerate(flips): if not flip: continue trials_qe['imgFiles'][i][1].reverse() trials_qe['labels'][i].reverse() trials_qe['imgData'][i]['Test'].reverse() # -- actual application offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats))).round().astype('int') assert len(offsets) == len(offsets) n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT n_applied_hits = 0 for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1, -ACTUAL_TRIALS_PER_HIT): for k in trials_qe: for i, offset in enumerate(offsets): exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i]) # exp._trials[k].insert(i_trial_begin + offset, 'test') n_applied_hits += 1 print '** n_applied_hits =', n_applied_hits print '** len for each in _trials =', \ {e: len(exp._trials[e]) for e in exp._trials} # -- sanity check assert 50 == n_applied_hits, n_applied_hits assert len(exp._trials['imgFiles']) == 50 * 164 s_ref_labels = [tuple(e) for e in trials_qe['labels']] offsets2 = np.arange(24)[::-1] + offsets ibie = zip(range(0, 50 * 164, 164), range(164, 50 * 164, 164)) assert all([[(e1, e2) for e1, e2 in np.array(exp._trials['labels'][ib:ie])[offsets2] ] == s_ref_labels for ib, ie in ibie]) # -- drop unneeded, potentially abusable stuffs #del exp._trials['imgData'] print '** Finished creating trials.' return exp, html_data
def get_exp(sandbox=True, dummy_upload=True): dataset = hvm.HvMWithDiscfade() # meta = dataset.meta ### meta_H = dataset.meta ### #inds = np.arange(len(meta)) n_repeats = 2 #get inds and practice_inds from file inds = list(np.load('inds.npy')) practice_inds = list(np.load('practice_inds.npy')) assert len(inds) == 128 inds = inds * n_repeats def test_inds(inds, n_repeats, practice_inds): assert len(inds) == 128 * n_repeats #Test that there are 4 per object, and 64 objects object_count = {} for i in inds: print 'Counting object %s' % (meta_H['obj'][i]) object_count[meta_H['obj'][i]] = object_count.get( meta_H['obj'][i], 0) + 1 print 'Number of unique objects' print object_count.keys() print len(object_count.keys()) assert len(object_count.keys()) == 64 for obj in object_count.keys(): assert object_count[obj] == 2 * n_repeats assert len(np.unique(inds)) * n_repeats == len(inds) assert len(set(inds) & set(practice_inds)) == 0 test_inds(inds, n_repeats, practice_inds) meta = meta_H[inds] ### #n_hits_from_data = len(meta) / ACTUAL_TRIALS_PER_HIT n_hits_from_data = len(meta) ### categories = np.unique(meta['category']) # dataset.categories ### combs = [categories] preproc = None image_bucket_name = 'hvm_timing' urls = dataset.publish_images(range(len(meta_H)), preproc, image_bucket_name, dummy_upload=dummy_upload) base_url = 'https://canonical_images.s3.amazonaws.com/' response_images = [{ 'urls': [base_url + cat + '.png' for cat in categories], 'meta': [{ 'category': 'Animals' }, { 'category': 'Boats' }, { 'category': 'Cars' }, { 'category': 'Chairs' }, { 'category': 'Faces' }, { 'category': 'Fruits' }, { 'category': 'Planes' }, { 'category': 'Tables' }], 'labels': categories }] mult = 15 ### 2 #ind_repeats = repeat_inds * REPEATS_PER_QE_IMG ### #rng = np.random.RandomState(0) ### #rng.shuffle(ind_repeats) ### ind_learn = practice_inds html_data = { 'response_images': response_images, 'combs': combs, # 'num_trials': 90 * 64 * mult, 'meta_field': 'category', 'meta': meta_H, 'idx_smp': inds, #'idx_rep': ind_repeats, ### 'idx_lrn': ind_learn, 'urls': urls, 'n_hits': mult, 'shuffle_test': False, } additionalrules = [{ 'old': 'LEARNINGPERIODNUMBER', 'new': str(LEARNING_PERIOD) }] trials_per_hit = ACTUAL_TRIALS_PER_HIT + 16 exp = SimpleMatchToSampleExperiment( htmlsrc='hvm_dense_smp_v6_2rpw.html', htmldst='hvm_dense_smp_v6_2rpw_n%05d.html', tmpdir='tmp_dense_smp_v6_2rpw', sandbox=sandbox, title='Object recognition --- report what you see', reward=0.15, duration=1500, keywords=[ 'neuroscience', 'psychology', 'experiment', 'object recognition' ], # noqa description= "Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment="hvm dense sampling of 128 V6 images, 2reps per worker", # noqa collection_name='hvm_dense_smp_v6_2rpw', max_assignments=1, bucket_name='hvm_dense_smp_v6_2rpw', trials_per_hit=trials_per_hit, # 144 + 8x4 repeats + 16 training html_data=html_data, frame_height_pix=1200, othersrc=['dltk.js', 'dltkexpr.js', 'dltkrsvp.js'], additionalrules=additionalrules, log_prefix='hvm_dense_smp_v6_2rpw__') # -- create trials exp.createTrials(verbose=1) all_ids = [m['Sample']['_id'] for m in exp._trials['imgData']] ids = set([str(_) for _ in np.unique(all_ids)]) ids_test = set([str(_) for _ in np.load('first_experiment_ids.npy')]) assert len(ids) == len(ids_test) assert ids == ids_test #exp.createTrials(sampling='with-replacement', verbose=1) ### n_total_trials = len(exp._trials['imgFiles']) #assert n_total_trials == mult * (len(meta) + 32 + 16) ### assert n_total_trials == mult * (len(meta) + 16) ### return exp, html_data
from hvm_analysis import hvm_analysis import dldata.stimulus_sets.hvm as nd import dldata.metrics.utils as utils from hvm_analysis import post_process_neural_regression_msplit import yamutils.fast as fast feature_file = sys.argv[1] nfeat = None if len(sys.argv) > 2: nfeat = int(sys.argv[2]) print "Start computing for " + feature_file # Get the dataset hvm_dataset = nd.HvMWithDiscfade() # Get features perm = np.random.RandomState(0).permutation(5760) if '.p' in feature_file: features = cPickle.load(open(feature_file)) elif '.h5' in feature_file: f_feat = h5py.File(feature_file, 'r') bns = ['bn'+str(i) for i in range(23)] F = f_feat[bns[0]][:].copy() for bn in bns[1:]: Fbn = f_feat[bn][:].copy() F = np.append(F, Fbn, axis=0) if 'hvm' in feature_file: F = F[:5760] features = F
def get_exp(sandbox=True, dummy_upload=True): dataset = hvm.HvMWithDiscfade() meta = dataset.meta categories = dataset.categories combs = [categories] inds = np.arange(len(meta)) preproc = None image_bucket_name = 'hvm_timing' urls = dataset.publish_images(inds, preproc, image_bucket_name, dummy_upload=dummy_upload) base_url = 'https://canonical_images.s3.amazonaws.com/' response_images = [{ 'urls': [base_url + cat + '.png' for cat in categories], 'meta': [{'category': 'Animals'}, {'category': 'Boats'}, {'category': 'Cars'}, {'category': 'Chairs'}, {'category': 'Faces'}, {'category': 'Fruits'}, {'category': 'Planes'}, {'category': 'Tables'}], 'labels': categories}] html_data = { 'response_images': response_images, 'combs': combs, 'num_trials': 90 * 64, 'meta_field': 'category', 'meta': meta, 'urls': urls, 'shuffle_test': True, } additionalrules = [{'old': 'LEARNINGPERIODNUMBER', 'new': str(LEARNING_PERIOD)}] trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32 exp = MatchToSampleFromDLDataExperiment( htmlsrc='hvm_basic_categorization.html', htmldst='hvm_basic_categorization_n%05d.html', sandbox=sandbox, title='Object recognition --- report what you see', reward=0.35, duration=1500, keywords=['neuroscience', 'psychology', 'experiment', 'object recognition'], # noqa description="***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment="hvm basic categorization", # noqa collection_name= 'hvm_basic_categorization', max_assignments=1, bucket_name='hvm_basic_categorization', trials_per_hit=trials_per_hit, # 150 + 8x4 repeats html_data=html_data, frame_height_pix=1200, othersrc = ['../../lib/dltk.js'], additionalrules=additionalrules ) # -- create trials exp.createTrials(sampling='with-replacement', verbose=1) n_total_trials = len(exp._trials['imgFiles']) assert n_total_trials == 90 * 64, n_total_trials # -- in each HIT, the followings will be repeated 4 times to # estimate "quality" of data ind_repeats = [3440, 3282, 3321, 3802, 5000, 3202, 4041, 4200] * REPEATS_PER_QE_IMG rng = np.random.RandomState(0) rng.shuffle(ind_repeats) trials_qe = {e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats] for e in exp._trials} offsets = np.arange( ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats)) ).round().astype('int') n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT n_applied_hits = 0 for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1, -ACTUAL_TRIALS_PER_HIT): for k in trials_qe: for i, offset in enumerate(offsets): exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i]) n_applied_hits += 1 print '** n_applied_hits =', n_applied_hits print '** len for each in _trials =', \ {e: len(exp._trials[e]) for e in exp._trials} # -- sanity check assert 38 == n_applied_hits, n_applied_hits assert len(exp._trials['imgFiles']) == 6976, len(exp._trials['imgFiles']) s_ref_labels = set([tuple(e) for e in trials_qe['labels']]) print(s_ref_labels) offsets2 = np.arange(8 * 4)[::-1] + offsets ibie = zip(range(0, 6976, trials_per_hit), range(trials_per_hit, 6976 + trials_per_hit, trials_per_hit)) assert all([set([tuple(e) for e in np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels for ib, ie in ibie[:-1]]) print '** Finished creating trials.' return exp, html_data
def get_exp(sandbox=True, dummy_upload=True): LEARNING_PERIOD = 10 practice_inds_low_var = range(0, 80, 10) practice_inds_high_var = range(3200, 3200 + 90 * 8, 90) practice_inds = practice_inds_low_var + practice_inds_high_var repeats_per_image = 2 ### 2 workers_per_image = 60 dataset = hvm.HvMWithDiscfade() # meta = dataset.meta ### meta_H = dataset.meta ### #inds = np.arange(len(meta)) inds = BehavioralBenchmark.experiments.image_level_benchmark.INDS assert (len(set(practice_inds).intersection(set(inds))) == 0) meta = meta_H[inds] ### #n_hits_from_data = len(meta) / ACTUAL_TRIALS_PER_HIT categories = np.unique(meta['category']) # dataset.categories ### combs = [categories] preproc = None image_bucket_name = 'hvm_timing' urls = dataset.publish_images(range(len(meta_H)), preproc, image_bucket_name, dummy_upload=dummy_upload) base_url = 'https://canonical_images.s3.amazonaws.com/' response_images = [{ 'urls': [base_url + cat + '.png' for cat in categories], 'meta': [{ 'category': 'Animals' }, { 'category': 'Boats' }, { 'category': 'Cars' }, { 'category': 'Chairs' }, { 'category': 'Faces' }, { 'category': 'Fruits' }, { 'category': 'Planes' }, { 'category': 'Tables' }], 'labels': categories }] inds_shown_per_hit = inds * repeats_per_image rng = np.random.RandomState(0) ind_learn = practice_inds ############################################################### html_data = { 'response_images': response_images, 'combs': combs, # 'num_trials': 90 * 64 * mult, 'meta_field': 'category', 'meta': meta_H, 'idx_smp': inds, 'idx_lrn': ind_learn, 'urls': urls, 'n_hits': workers_per_image, 'shuffle_test': False, } additionalrules = [{ 'old': 'LEARNINGPERIODNUMBER', 'new': str(LEARNING_PERIOD) }] trials_per_hit = len(inds_shown_per_hit) + len(practice_inds) exp = SimpleMatchToSampleExperiment( htmlsrc='hvm_dense_smp_v6_s100.html', htmldst='hvm_dense_smp_v6_s100_n%05d.html', tmpdir='tmp_dense_smp_v6_s100', sandbox=sandbox, title='Object recognition --- report what you see', reward=0.10, duration=1500, keywords=[ 'neuroscience', 'psychology', 'experiment', 'object recognition' ], # noqa description= "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment="hvm dense sampling of 100 V6 images", # noqa collection_name='hvm_dense_smp_v6_s100', max_assignments=1, bucket_name='hvm_dense_smp_v6_s100', trials_per_hit=trials_per_hit, # 144 + 8x4 repeats + 16 training html_data=html_data, frame_height_pix=1200, othersrc=[ '../../lib/dltk.js', '../../lib/dltkexpr.js', '../../lib/dltkrsvp.js' ], additionalrules=additionalrules, log_prefix='hvm_dense_smp_v6_s100__') # -- create trials exp.createTrials(verbose=1) #exp.createTrials(sampling='with-replacement', verbose=1) ### n_total_trials = len(exp._trials['imgFiles']) assert n_total_trials == mult * (len(meta) + 32 + 16) return exp, html_data