def package_data_assembly(proto_data_assembly, assembly_identifier, stimulus_set_identifier, assembly_class="NeuronRecordingAssembly", bucket_name="brainio-contrib"): """ Package a set of data along with its metadata for the BrainIO system. :param proto_data_assembly: An xarray DataArray containing experimental measurements and all related metadata. * The dimensions of a neural DataArray must be * presentation * neuroid * time_bin A behavioral DataArray should also have a presentation dimension, but can be flexible about its other dimensions. * The presentation dimension must have an image_id coordinate and should have coordinates for presentation-level metadata such as repetition and image_id. The presentation dimension should not have coordinates for image-specific metadata, these will be drawn from the StimulusSet based on image_id. * The neuroid dimension must have a neuroid_id coordinate and should have coordinates for as much neural metadata as possible (e.g. region, subregion, animal, row in array, column in array, etc.) * The time_bin dimension should have coordinates time_bin_start and time_bin_end. :param assembly_identifier: A dot-separated string starting with a lab identifier. * For published: <lab identifier>.<first author e.g. 'Rajalingham' or 'MajajHong' for shared first-author><YYYY year of publication> * For requests: <lab identifier>.<b for behavioral|n for neuroidal>.<m for monkey|h for human>.<proposer e.g. 'Margalit'>.<pull request number> :param stimulus_set_identifier: The unique name of an existing StimulusSet in the BrainIO system. :param assembly_class: The name of a DataAssembly subclass. :param bucket_name: 'brainio-dicarlo' for DiCarlo Lab assemblies, 'brainio-contrib' for external assemblies. """ _logger.debug(f"Packaging {assembly_identifier}") # verify verify_assembly(proto_data_assembly, assembly_class=assembly_class) assert hasattr(brainio_base.assemblies, assembly_class) assert stimulus_set_identifier in list_stimulus_sets(), \ f"StimulusSet {stimulus_set_identifier} not found in packaged stimulus sets" # identifiers assembly_store_identifier = "assy_" + assembly_identifier.replace(".", "_") netcdf_file_name = assembly_store_identifier + ".nc" target_netcdf_path = Path(__file__).parent / netcdf_file_name s3_key = netcdf_file_name # execute netcdf_kf_sha1 = write_netcdf(proto_data_assembly, target_netcdf_path) upload_to_s3(target_netcdf_path, bucket_name, s3_key) lookup.append(object_identifier=assembly_identifier, stimulus_set_identifier=stimulus_set_identifier, lookup_type=TYPE_ASSEMBLY, bucket_name=bucket_name, sha1=netcdf_kf_sha1, s3_key=s3_key, cls=assembly_class) _logger.debug(f"assembly {assembly_identifier} packaged")
def main(): data_dir = Path( __file__).parents[6] / 'data2' / 'active' / 'users' / 'sachis' assert os.path.isdir(data_dir) import brainio_collection print(brainio_collection.list_stimulus_sets()) print(brainio_collection.list_assemblies()) stimuli = brainio_collection.get_stimulus_set( 'dicarlo.objectome.public').sort_values(by='image_id') print(stimuli) # print(stimuli.columns) stimuli = collect_stimuli(data_dir) # stimuli.name = 'dicarlo.Rust2012' # assembly = load_responses(data_dir, stimuli) # assembly.name = 'dicarlo.SanghaviKar2020' # print('Packaging stimuli') # package_stimulus_set(stimuli, stimulus_set_name=stimuli.name, bucket_name='brainio-dicarlo') # print('Packaging assembly') # package_data_assembly(assembly, data_assembly_name=assembly.name, stimulus_set_name=stimuli.name, # bucket_name='brainio-dicarlo') return
def test_list_stimulus_set(stimulus_set): l = brainio_collection.list_stimulus_sets() assert stimulus_set in l