def package_Movshon_datasets(name): assembly = load_assembly(name) assembly.load() base_assembly = assembly _, unique_indices = np.unique(base_assembly['image_id'].values, return_index=True) unique_indices = np.sort(unique_indices) # preserve order image_ids = base_assembly['image_id'].values[unique_indices] stratification_values = base_assembly['texture_type'].values[unique_indices] rng = RandomState(seed=12) splitter = StratifiedShuffleSplit(n_splits=1, train_size=.3, test_size=None, random_state=rng) split = next(splitter.split(np.zeros(len(image_ids)), stratification_values)) access_indices = {assembly_type: image_indices for assembly_type, image_indices in zip(['public', 'private'], split)} for access in ['public', 'private']: indices = access_indices[access] subset_image_ids = image_ids[indices] assembly = base_assembly[ {'presentation': [image_id in subset_image_ids for image_id in base_assembly['image_id'].values]}] adapt_stimulus_set(assembly, access) package_stimulus_set(assembly.attrs['stimulus_set'], stimulus_set_name=assembly.attrs['stimulus_set_name']) del assembly.attrs['stimulus_set'] package_data_assembly(assembly, f"{name}.{access}", stimulus_set_name=assembly.attrs['stimulus_set_name']) # not really sure if this is necessary return assembly
def main(access): local_data_path = fetch._local_data_path name_root = 'movshon.FreemanZiemba2013' stimulus_set_name_existing = name_root + "-" + access if access != "both" else name_root stimulus_set_name_new = name_root + ".aperture-" + access if access != "both" else name_root + ".aperture" data_assembly_name_existing = name_root + "." + access if access != "both" else name_root data_assembly_name_new = name_root + ".aperture." + access if access != "both" else name_root + ".aperture" temp_dir = os.path.join(local_data_path, "temp_" + data_assembly_name_new.replace(".", "_")) stimulus_set_existing = get_stimulus_set(stimulus_set_name_existing) stimulus_set_new = convert_stimuli(stimulus_set_existing, stimulus_set_name_new, temp_dir) mapping = stimulus_set_new.id_mapping _logger.debug(f"Packaging stimuli: {stimulus_set_new.name}") package_stimulus_set(stimulus_set_new, stimulus_set_name=stimulus_set_new.name, bucket_name="brainio-contrib") data_assembly_existing = get_assembly(data_assembly_name_existing) proto_data_assembly_new = convert_assembly(data_assembly_existing, data_assembly_name_new, stimulus_set_new, mapping) _logger.debug(f"Packaging assembly: {data_assembly_name_new}") package_data_assembly(proto_data_assembly_new, data_assembly_name_new, stimulus_set_name_new, bucket_name="brainio-contrib")
def main(): data_dir = Path(__file__).parent / 'coco' stimuli = collect_stimuli(data_dir / 'stimuli') stimuli.name = 'dicarlo.Kar2018cocogray' assembly = load_responses(data_dir / 'cocoGray_neural.h5', stimuli) assembly.name = 'dicarlo.Kar2018cocogray' print("Packaging stimuli") package_stimulus_set(stimuli, stimulus_set_name=stimuli.name, bucket_name="brainio-dicarlo") print("Packaging assembly") package_data_assembly(assembly, data_assembly_name=assembly.name, stimulus_set_name=stimuli.name, bucket_name="brainio-dicarlo")
def main(): data_dir = Path(__file__).parents[6] / 'data2' / 'active' / 'users' / 'sachis' / 'database' assert os.path.isdir(data_dir) stimuli = brainio_collection.get_stimulus_set('dicarlo.hvm') assembly = load_responses(data_dir, stimuli) assembly.name = 'dicarlo.Sanghavi2020' print(assembly) print('Packaging assembly') package_data_assembly(assembly, data_assembly_name=assembly.name, stimulus_set_name=stimuli.name, bucket_name="brainio-dicarlo") return
def package_dicarlo_datasets(name): base_assembly = load_assembly(name) base_assembly.load() base_assembly = _filter_erroneous_neuroids(base_assembly) for variation_name, target_variation in {'public': [0, 3], 'private': [6]}.items(): assembly = base_assembly[ {'presentation': [variation in target_variation for variation in base_assembly['variation'].values]}] assert hasattr(assembly, 'variation') adapt_stimulus_set(assembly, name_suffix=variation_name) package_stimulus_set(assembly.attrs['stimulus_set'], stimulus_set_name=assembly.attrs['stimulus_set_name'], bucket_name="brainio-dicarlo") del assembly.attrs['stimulus_set'] package_data_assembly(assembly, f'{name}.{variation_name}',assembly.attrs['stimulus_set_name'], bucket_name='brainio-dicarlo') return assembly
def main(): data_dir = Path(__file__).parents[3] / 'Rajalingham2020' assert os.path.isdir(data_dir) stimuli = collect_stimuli(data_dir) stimuli.name = 'dicarlo.Rajalingham2020orthographic_IT' assembly = load_responses(data_dir, stimuli) assembly.name = 'dicarlo.Rajalingham2020orthographic_IT' print('Packaging stimuli') package_stimulus_set(stimuli, stimulus_set_name=stimuli.name, bucket_name="brainio-dicarlo") print('Packaging assembly') package_data_assembly(assembly, data_assembly_name=assembly.name, stimulus_set_name=stimuli.name, bucket_name="brainio-dicarlo") return
def main(): data_dir = Path(__file__).parent / 'search_datasets' data_path = data_dir / 'waldo' # create stimuli stimuli = collect_stimuli(data_path) stimuli.name = 'klab.Zhang2018.search_waldo' # create assembly for different subjects assembly = collect_data(data_path, [*range(1, 16)]) assembly.name = 'klab.Zhang2018search_waldo' # package print("\nPackaging Stimuli ----------") package_stimulus_set(stimuli, stimulus_set_name=stimuli.name) print("\nPackaging Assembly ----------") package_data_assembly(assembly, data_assembly_name=assembly.name, stimulus_set_name=stimuli.name)
def package(assembly, stimuli): print("Packaging stimuli") package_stimulus_set(stimuli, stimulus_set_name=stimuli.name) print("Packaging assembly") package_data_assembly(assembly, data_assembly_name=assembly.name, stimulus_set_name=stimuli.name)
from brainio_contrib.packaging import package_data_assembly def create_xarray(savepath): '''Packages the DataArray (stimulus set the same as HvM). Returns an xarray of ["neuroid", "presentation", "time_bin"] Note: using my "10ms" branch of dldata''' from dldata.stimulus_sets import hvm dataset = hvm.HvMWithDiscfade() assembly = dataset.xr_from_hvm_10ms_temporal() assembly.reset_index(assembly.indexes.keys(), inplace=True) assembly.to_netcdf(savepath) return assembly if __name__ == '__main__': logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) for logger in [ 'peewee', 's3transfer', 'botocore', 'boto3', 'urllib3', 'PIL' ]: logging.getLogger(logger).setLevel(logging.INFO) assembly_path = os.path.join(os.path.dirname(__file__), 'darren_xr.nc') create_xarray( assembly_path) # Note: this function was run separately by @anayebi assembly = xr.open_dataarray(assembly_path) assembly = NeuroidAssembly(assembly) package_data_assembly(assembly, data_assembly_name='dicarlo.Majaj2015.temporal-10ms', bucket_name='brainio-dicarlo', stimulus_set_name='dicarlo.hvm')