def collect_stimuli(data_dir):
    IT_base616 = pickle.load(open(os.path.join(data_dir, 'data_IT_base616.pkl'), 'rb'))
    stimuli = IT_base616['meta']

    # Add columns
    stimuli['image_id'] = ''
    stimuli['image_file_name'] = ''
    stimuli['image_current_local_file_path'] = ''
    stimuli['image_path_within_store'] = ''
    stimuli['image_file_sha1'] = ''

    for idx, row in stimuli.iterrows():
        image_file_name = f'{row.id}.png'
        image_file_path = os.path.join(data_dir, 'stimuli', image_file_name)
        im_kf = kf(image_file_path)

        stimuli.at[idx, 'image_id'] = im_kf.sha1
        stimuli.at[idx, 'image_file_name'] = image_file_name
        stimuli.at[idx, 'image_current_local_file_path'] = image_file_path
        stimuli.at[idx, 'image_path_within_store'] = image_file_name
        stimuli.at[idx, 'image_file_sha1'] = im_kf.sha1

    stimuli = stimuli.drop(columns='id')  # Drop ID column since the information is retained in other columns
    stimuli['grp5_bigram_freq'] = stimuli['grp5_bigram_freq'].astype(str)  # IntervalIndex not supported by netCDF4
    stimuli = stimuli.astype({column_name: 'int32' for column_name
                              in stimuli.select_dtypes(include=['bool']).keys()})  # Bool not supported by netCDF4
    assert len(np.unique(stimuli['image_id'])) == len(stimuli)
    stimuli = StimulusSet(stimuli)
    stimuli.image_paths = \
        {stimuli.at[idx, 'image_id']: stimuli.at[idx, 'image_current_local_file_path'] for idx in range(len(stimuli))}
    return stimuli
Пример #2
0
def write_netcdf(assembly, target_netcdf_file):
    _logger.debug(f"Writing assembly to {target_netcdf_file}")
    for index in assembly.indexes.keys():
        assembly.reset_index(index, inplace=True)
    assembly.to_netcdf(target_netcdf_file)
    netcdf_kf = kf(target_netcdf_file)
    return netcdf_kf.sha1
Пример #3
0
def convert_stimuli(stimulus_set_existing, stimulus_set_name_new,
                    image_dir_new):
    Path(image_dir_new).mkdir(parents=True, exist_ok=True)

    image_converter = ApplyCosineAperture(target_dir=image_dir_new)
    converted_image_paths = {}
    converted_image_ids = {}
    for image_id in tqdm(stimulus_set_existing['image_id'],
                         total=len(stimulus_set_existing),
                         desc='apply cosine aperture'):
        converted_image_path = image_converter.convert_image(
            image_path=stimulus_set_existing.get_image(image_id))
        converted_image_id = kf(converted_image_path).sha1
        converted_image_ids[image_id] = converted_image_id
        converted_image_paths[converted_image_id] = converted_image_path
        _logger.debug(
            f"{image_id} -> {converted_image_id}:  {converted_image_path}")

    converted_stimuli = StimulusSet(stimulus_set_existing.copy(deep=True))
    converted_stimuli["image_id_without_aperture"] = converted_stimuli[
        "image_id"]
    converted_stimuli["image_id"] = converted_stimuli["image_id"].map(
        converted_image_ids)
    converted_stimuli["image_file_sha1"] = converted_stimuli["image_id"]

    converted_stimuli.image_paths = converted_image_paths
    converted_stimuli.name = stimulus_set_name_new
    converted_stimuli.id_mapping = converted_image_ids

    return converted_stimuli
Пример #4
0
def create_image_zip(stimuli, target_zip_path):
    os.makedirs(os.path.dirname(target_zip_path), exist_ok=True)
    with zipfile.ZipFile(target_zip_path, 'w') as target_zip:
        for image in stimuli.itertuples():
            target_zip.write(image.image_current_local_file_path,
                             arcname=image.image_path_within_store)
    zip_kf = kf(target_zip_path)
    return zip_kf.sha1
Пример #5
0
def test_package_stimulus_set(transaction):
    proto = prep_proto_stim()
    stim_set_name = "dicarlo.test." + now()
    test_bucket = "brainio-temp"
    stim_model = package_stimulus_set(proto,
                                      stimulus_set_name=stim_set_name,
                                      bucket_name=test_bucket)
    assert stim_model
    assert stim_model.name == stim_set_name
    stim_set_fetched = brainio_collection.get_stimulus_set(stim_set_name)
    assert len(proto) == len(stim_set_fetched)
    for image in proto.itertuples():
        orig = proto.get_image(image.image_id)
        fetched = stim_set_fetched.get_image(image.image_id)
        assert os.path.basename(orig) == os.path.basename(fetched)
        kf_orig = kf(orig)
        kf_fetched = kf(fetched)
        assert kf_orig.sha1 == kf_fetched.sha1
Пример #6
0
def create_image_zip(proto_stimulus_set, target_zip_path):
    _logger.debug(f"Zipping stimulus set to {target_zip_path}")
    assert isinstance(proto_stimulus_set, StimulusSet), f"Expected StimulusSet object, got {proto_stimulus_set}"
    os.makedirs(os.path.dirname(target_zip_path), exist_ok=True)
    with zipfile.ZipFile(target_zip_path, 'w') as target_zip:
        for image in proto_stimulus_set.itertuples():
            arcname = image.image_path_within_store if hasattr(image, 'image_path_within_store') \
                else image.image_file_name
            target_zip.write(proto_stimulus_set.get_image(image.image_id), arcname=arcname)
    zip_kf = kf(target_zip_path)
    return zip_kf.sha1
Пример #7
0
def add_assembly_lookup(assembly_name, stim_set_model, bucket_name,
                        target_netcdf_file, assembly_store_unique_name):
    kf_netcdf = kf(target_netcdf_file)
    assy, created = AssemblyModel.get_or_create(
        name=assembly_name,
        assembly_class="BehavioralAssembly",
        stimulus_set=stim_set_model)
    store, created = AssemblyStoreModel.get_or_create(
        assembly_type="netCDF",
        location_type="S3",
        location=
        f"https://{bucket_name}.s3.amazonaws.com/{assembly_store_unique_name }.nc",
        unique_name=assembly_store_unique_name,
        sha1=kf_netcdf.sha1)
    assy_store_map, created = AssemblyStoreMap.get_or_create(
        assembly_model=assy, assembly_store_model=store, role=assembly_name)
Пример #8
0
def collect_stimuli(stimuli_directory):
    meta = os.path.join(stimuli_directory, 'cocogray_labels.mat')
    meta = h5py.File(meta, 'r')
    labels = [''.join(chr(c) for c in meta[meta['lb'].value[0, i]]) for i in range(meta['lb'].value[0].size)]
    stimuli = []
    for image_file_path in tqdm(glob(os.path.join(stimuli_directory, '*.png'))):
        image_file_name = os.path.basename(image_file_path)
        image_number = re.match('im([0-9]+).png', image_file_name)
        image_number = int(image_number.group(1))
        im_kf = kf(image_file_path)
        stimuli.append({
            'image_id': im_kf.sha1,
            'image_file_name': image_file_name,
            'image_current_local_file_path': image_file_path,
            'image_file_sha1': im_kf.sha1,
            'image_number': image_number,
            'image_path_within_store': image_file_name,
            'label': labels[image_number],
        })
    stimuli = pd.DataFrame(stimuli)
    assert len(stimuli) == 1600
    assert len(np.unique(stimuli['image_id'])) == len(stimuli)
    return stimuli