def _load_assembly(self): assembly = load_naturalStories() # we're going to treat subjects as "neuroids" to make it easier for our metrics which mostly deal with neurons assembly = assembly.rename({'subjects': 'neuroid'}) assembly['neuroid_id'] = 'neuroid', assembly['subject_id'] assembly = NeuroidAssembly(assembly) # A2VG5S4UL5UGRS only has 6 reading times that are above threshold (i.e. not nan). That is not enough for # 5-fold cross-validation where we cannot compute correlation on a single data point. # A1I02VZ07MZB7F has too few values for stratified cross-validation # (n_splits=5 cannot be greater than the number of members in each class.) assembly = assembly[{ 'neuroid': [ subject not in ['A2VG5S4UL5UGRS', 'A1I02VZ07MZB7F'] for subject in assembly['subject_id'].values ] }] # add word_core that treats e.g. "\This" and "This" as the same words (to split over) assembly.stimulus_set['word_core'] = [ re.sub(r'[^\w\s]', '', word) for word in assembly.stimulus_set['word'].values ] assembly['word_core'] = 'presentation', [ re.sub(r'[^\w\s]', '', word) for word in assembly['word'].values ] return assembly
def _package_prediction(self, predicted_values, source): coords = { coord: (dims, values) for coord, dims, values in walk_coords(source) if not array_is_element(dims, self._neuroid_dim) } # re-package neuroid coords dims = source.dims # if there is only one neuroid coordinate, it would get discarded and the dimension would be used as coordinate. # to avoid this, we can build the assembly first and then stack on the neuroid dimension. neuroid_level_dim = None if len( self._target_neuroid_values ) == 1: # extract single key: https://stackoverflow.com/a/20145927/2225200 (neuroid_level_dim, _), = self._target_neuroid_values.items() dims = [ dim if dim != self._neuroid_dim else neuroid_level_dim for dim in dims ] for target_coord, target_value in self._target_neuroid_values.items(): # this might overwrite values which is okay coords[target_coord] = (neuroid_level_dim or self._neuroid_dim), target_value prediction = NeuroidAssembly(predicted_values, coords=coords, dims=dims) if neuroid_level_dim: prediction = prediction.stack( **{self._neuroid_dim: [neuroid_level_dim]}) return prediction
def _load_assembly(self): assembly = load_naturalStories() stimulus_set = assembly.stimulus_set # we're going to treat subjects as "neuroids" to make it easier for our metrics assembly = assembly.mean('subjects') assembly = assembly.expand_dims('neuroid') assembly['neuroid_id'] = 'neuroid', [0] assembly['subject_id'] = 'neuroid', ['all'] assembly = NeuroidAssembly(assembly) assembly.attrs['stimulus_set'] = stimulus_set return assembly
def test_alignment(self): jumbled_prediction = NeuroidAssembly( np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(reversed(range(500)))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid', list(reversed(range(10)))), 'neuroid_meta': ('neuroid', [0] * 10) }, dims=['presentation', 'neuroid']) prediction = jumbled_prediction.sortby(['image_id', 'neuroid_id']) correlation = XarrayCorrelation(scipy.stats.pearsonr) score = correlation(jumbled_prediction, prediction) assert all(score == approx(1))
def test_neuroid_single_coord(self): jumbled_source = NeuroidAssembly( np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(reversed(range(500)))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid_id', list(reversed(range(10)))) }, dims=['presentation', 'neuroid_id']).stack(neuroid=['neuroid_id']) target = jumbled_source.sortby(['image_id', 'neuroid_id']) regression = XarrayRegression(LinearRegression()) regression.fit(jumbled_source, target) prediction = regression.predict(jumbled_source) assert set(prediction.dims) == {'presentation', 'neuroid'} assert len(prediction['neuroid_id']) == 10
def test_alignment(self): assembly = NeuroidAssembly( [[1, 2], [1, 2], [4, 3], [4, 3]], coords={ 'image_id': ('presentation', list(range(4))), 'image_meta': ('presentation', list(range(4))), 'neuroid_id': ('neuroid', list(range(2))), 'neuroid_meta': ('neuroid', list(range(2))) }, dims=['presentation', 'neuroid']) matrix = RSA()(assembly) assert np.all(np.diag(matrix) == approx(1., abs=.001)) assert all(matrix.values[np.triu_indices(matrix.shape[0], k=1)] == matrix.values[np.tril_indices(matrix.shape[0], k=-1)] ), "upper and lower triangular need to be equal" expected = DataAssembly( [[1., 1., -1., -1.], [1., 1., -1., -1.], [-1., -1., 1., 1.], [-1., -1., 1., 1.]], coords={ 'image_id': ('presentation', list(range(4))), 'image_meta': ('presentation', list(range(4))) }, dims=['presentation', 'presentation']) np.testing.assert_array_almost_equal( matrix.values, expected.values) # does not take ordering into account
def test_no_expand_raw_level(self): assembly = np.random.rand(3, 100) assembly = NeuroidAssembly(assembly, coords={ 'neuroid': list(range(assembly.shape[1])), 'division_coord': list(range(assembly.shape[0])) }, dims=['division_coord', 'neuroid']) transformation = CartesianProduct(dividers=['division_coord']) class RawMetricPlaceholder(Metric): def __call__(self, assembly, *args, **kwargs): result = Score([assembly.values[0]], dims=['dim']) raw = Score(result.copy(), coords={ 'dim_id': ('dim', [assembly.values[1]]), 'division_coord': ('dim', [assembly.values[2]]) }) result.attrs['raw'] = raw return result metric = RawMetricPlaceholder() result = transformation(assembly, apply=metric) assert result.dims == ("division_coord", "dim") assert hasattr(result, 'raw') assert result.raw.dims == ("dim", ) assert 'division_coord' not in result.raw.dims # no dimension assert hasattr(result.raw, 'division_coord') # but a level assert result.raw["dim"].variable.level_names == [ "dim_id", "division_coord" ]
def look_at(self, stimuli: Union[StimulusSet, List[str]], number_of_trials=1): if len( stimuli ) == 1: # configuration stimuli, e.g. Kar2019 or Marques2020. Return to get to the real stimuli return NeuroidAssembly( [[np.arange(len(self._time_bins))]], coords={ **{ 'neuroid_id': ('neuroid', [123]), 'neuroid_num': ('neuroid', [123]) }, **{ column: ('presentation', values) for column, values in stimuli.iteritems() }, **{ 'time_bin_start': ('time_bin', [ start for start, end in self._time_bins ]), 'time_bin_end': ('time_bin', [ end for start, end in self._time_bins ]) }, }, dims=['presentation', 'neuroid', 'time_bin']) self.stimuli = stimuli raise StopIteration()
def from_paths(self, *args, **kwargs): raw_activations = super(TemporalExtractor, self).from_paths(*args, **kwargs) # introduce time dimension regions = defaultdict(list) for layer in set(raw_activations['layer'].values): match = re.match(r'(([^-]*)\..*|logits|avgpool)-t([0-9]+)', layer) region, timestep = match.group(2) if match.group(2) else match.group(1), match.group(3) stripped_layer = match.group(1) regions[region].append((layer, stripped_layer, timestep)) activations = {} for region, time_layers in regions.items(): for (full_layer, stripped_layer, timestep) in time_layers: region_time_activations = raw_activations.sel(layer=full_layer) region_time_activations['layer'] = 'neuroid', [stripped_layer] * len(region_time_activations['neuroid']) activations[(region, timestep)] = region_time_activations for key, key_activations in activations.items(): region, timestep = key key_activations['region'] = 'neuroid', [region] * len(key_activations['neuroid']) activations[key] = NeuroidAssembly([key_activations.values], coords={ **{coord: (dims, values) for coord, dims, values in walk_coords(activations[key]) if coord != 'neuroid_id'}, # otherwise, neuroid dim will be as large as before with nans **{'time_step': [int(timestep)]} }, dims=['time_step'] + list(key_activations.dims)) activations = list(activations.values()) activations = merge_data_arrays(activations) # rebuild neuroid_id without timestep neuroid_id = [".".join([f"{value}" for value in values]) for values in zip(*[ activations[coord].values for coord in ['model', 'region', 'neuroid_num']])] activations['neuroid_id'] = 'neuroid', neuroid_id return activations
def test_misaligned(self): jumbled_source = NeuroidAssembly( np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(reversed(range(500)))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid', list(reversed(range(10)))), 'neuroid_meta': ('neuroid', [0] * 10) }, dims=['presentation', 'neuroid']) target = jumbled_source.sortby(['image_id', 'neuroid_id']) cv = CrossValidation(splits=10, stratification_coord=None) metric = self.MetricPlaceholder() score = cv(jumbled_source, target, apply=metric) assert len(metric.train_source_assemblies) == len(metric.test_source_assemblies) == \ len(metric.train_target_assemblies) == len(metric.test_target_assemblies) == 10 assert len(score.attrs['raw']) == 10
def test_fitpredict_alignment(self): jumbled_source = NeuroidAssembly( np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(reversed(range(500)))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid', list(reversed(range(10)))), 'neuroid_meta': ('neuroid', [0] * 10) }, dims=['presentation', 'neuroid']) target = jumbled_source.sortby(['image_id', 'neuroid_id']) regression = XarrayRegression(LinearRegression()) regression.fit(jumbled_source, target) prediction = regression.predict(jumbled_source) # do not test for alignment of metadata - it is only important that the data is well-aligned with the metadata. np.testing.assert_array_almost_equal( prediction.sortby(['image_id', 'neuroid_id']).values, target.sortby(['image_id', 'neuroid_id']).values)
def look_at(self, *args, **kwargs): return NeuroidAssembly( [[1, 2, 3], [1, 2, 3]], coords={ 'image_id': ('presentation', ['image1', 'image2']), 'object_name': ('presentation', ['number', 'number']), 'neuroid_id': ('neuroid', [1, 2, 3]), 'region': ('neuroid', ['IT'] * 3), }, dims=['presentation', 'neuroid'])
def test_small(self): assembly = NeuroidAssembly((np.arange(30 * 25) + np.random.standard_normal(30 * 25)).reshape((30, 25)), coords={'image_id': ('presentation', np.arange(30)), 'object_name': ('presentation', ['a', 'b', 'c'] * 10), 'neuroid_id': ('neuroid', np.arange(25)), 'region': ('neuroid', [None] * 25)}, dims=['presentation', 'neuroid']) metric = CKACrossValidated() score = metric(assembly1=assembly, assembly2=assembly) assert score.sel(aggregation='center') == approx(1)
def test_equal30(self): assembly = NeuroidAssembly((np.arange(30 * 25) + np.random.standard_normal(30 * 25)).reshape((30, 25)), coords={'image_id': ('presentation', np.arange(30)), 'object_name': ('presentation', ['a', 'b', 'c'] * 10), 'neuroid_id': ('neuroid', np.arange(25)), 'region': ('neuroid', [None] * 25)}, dims=['presentation', 'neuroid']) similarity = CKAMetric() score = similarity(assembly, assembly) assert score == approx(1.)
def get_assembly(): image_names = [] for i in range(1, 21): image_names.append(f'images/{i}.png') assembly = NeuroidAssembly( (np.arange(40 * 5) + np.random.standard_normal(40 * 5)).reshape( (5, 40, 1)), coords={ 'image_id': ('presentation', image_names * 2), 'object_name': ('presentation', ['a'] * 40), 'repetition': ('presentation', ([1] * 20 + [2] * 20)), 'neuroid_id': ('neuroid', np.arange(5)), 'region': ('neuroid', ['IT'] * 5), 'time_bin_start': ('time_bin', [70]), 'time_bin_end': ('time_bin', [170]) }, dims=['neuroid', 'presentation', 'time_bin']) labels = ['a'] * 10 + ['b'] * 10 stimulus_set = StimulusSet([{ 'image_id': image_names[i], 'object_name': 'a', 'image_label': labels[i] } for i in range(20)]) stimulus_set.image_paths = { image_name: os.path.join(os.path.dirname(__file__), image_name) for image_name in image_names } stimulus_set.identifier = 'test' assembly.attrs['stimulus_set'] = stimulus_set assembly.attrs['stimulus_set_name'] = stimulus_set.identifier assembly = assembly.squeeze("time_bin") return assembly.transpose('presentation', 'neuroid')
def test_one_division_similarity_dim_last(self): assembly = np.random.rand(3, 100) assembly = NeuroidAssembly(assembly, coords={ 'neuroid': list(range(assembly.shape[1])), 'division_coord': list(range(assembly.shape[0])) }, dims=['division_coord', 'neuroid']) transformation = CartesianProduct(dividers=['division_coord']) placeholder = self.MetricPlaceholder() transformation(assembly, apply=placeholder) assert len(assembly['division_coord']) == len(placeholder.assemblies) targets = [ assembly.sel(division_coord=i) for i in assembly['division_coord'].values ] for target in targets: match = any([actual == target] for actual in placeholder.assemblies) assert match, "expected divided assembly not found: {target}"
def test_neuroid_single_coord(self): prediction = NeuroidAssembly( np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(range(500))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid_id', list(range(10))) }, dims=['presentation', 'neuroid_id']).stack(neuroid=['neuroid_id']) correlation = XarrayCorrelation(lambda a, b: (1, 0)) score = correlation(prediction, prediction) np.testing.assert_array_equal(score.dims, ['neuroid']) assert len(score['neuroid']) == 10
def test_dummy_data(self): data = NeuroidAssembly( np.tile(np.arange(10)[:, np.newaxis], [5, 10]), coords={ 'image_id': ('presentation', np.tile(list(alphabet)[:10], 5)), 'image_meta': ('presentation', np.tile(list(alphabet)[:10], 5)), 'repetition': ('presentation', np.repeat(np.arange(5), 10)), 'neuroid_id': ('neuroid', np.arange(10)), 'neuroid_meta': ('neuroid', np.arange(10)) }, dims=['presentation', 'neuroid']) ceiler = InternalConsistency() ceiling = ceiler(data) assert ceiling.sel(aggregation='center') == 1
def test_correlation(self): prediction = NeuroidAssembly(np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(range(500))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid', list(range(10))), 'neuroid_meta': ('neuroid', [0] * 10) }, dims=['presentation', 'neuroid']) correlation = XarrayCorrelation(lambda a, b: (1, 0)) score = correlation(prediction, prediction) assert all(score == approx(1))
def test_small(self): assembly = NeuroidAssembly( (np.arange(30 * 25) + np.random.standard_normal(30 * 25)).reshape( (30, 25)), coords={ 'image_id': ('presentation', np.arange(30)), 'object_name': ('presentation', ['a', 'b', 'c'] * 10), 'neuroid_id': ('neuroid', np.arange(25)), 'region': ('neuroid', ['some_region'] * 25) }, dims=['presentation', 'neuroid']) metric = CrossRegressedCorrelation(regression=pls_regression(), correlation=pearsonr_correlation()) score = metric(source=assembly, target=assembly) assert score.sel(aggregation='center') == approx(1, abs=.00001)
def test_2d_equal20(self): rdm = np.random.rand( 20, 20) # not mirrored across diagonal, but fine for unit test np.fill_diagonal(rdm, 0) rdm = NeuroidAssembly(rdm, coords={ 'image_id': ('presentation', list(range(20))), 'object_name': ('presentation', ['A', 'B'] * 10) }, dims=['presentation', 'presentation']) similarity = RDMSimilarity() score = similarity(rdm, rdm) assert score == approx(1.)
def test_dimensions(self): prediction = NeuroidAssembly(np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(range(500))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid', list(range(10))), 'neuroid_meta': ('neuroid', [0] * 10) }, dims=['presentation', 'neuroid']) correlation = XarrayCorrelation(lambda a, b: (1, 0)) score = correlation(prediction, prediction) np.testing.assert_array_equal(score.dims, ['neuroid']) np.testing.assert_array_equal(score.shape, [10])
def test_small(self, regression_ctr): assembly = NeuroidAssembly( (np.arange(30 * 25) + np.random.standard_normal(30 * 25)).reshape( (30, 25)), coords={ 'image_id': ('presentation', np.arange(30)), 'object_name': ('presentation', ['a', 'b', 'c'] * 10), 'neuroid_id': ('neuroid', np.arange(25)), 'region': ('neuroid', [None] * 25) }, dims=['presentation', 'neuroid']) regression = regression_ctr() regression.fit(source=assembly, target=assembly) prediction = regression.predict(source=assembly) assert all(prediction['image_id'] == assembly['image_id']) assert all(prediction['neuroid_id'] == assembly['neuroid_id'])
def _test_no_division_apply_manually(self, num_values): assembly = np.random.rand(num_values) assembly = NeuroidAssembly( assembly, coords={'neuroid': list(range(len(assembly)))}, dims=['neuroid']) transformation = CartesianProduct() generator = transformation.pipe(assembly) for divided_assembly in generator: # should run only once np.testing.assert_array_equal(assembly.values, divided_assembly[0]) done = generator.send( DataAssembly([0], coords={'split': [0]}, dims=['split'])) assert done break similarity = next(generator) np.testing.assert_array_equal(similarity.shape, [1]) np.testing.assert_array_equal(similarity.dims, ['split']) assert similarity[0] == 0
def test_presentation_neuroid(self): assembly = NeuroidAssembly(np.random.rand(500, 10), coords={ 'image_id': ('presentation', list(range(500))), 'image_meta': ('presentation', [0] * 500), 'neuroid_id': ('neuroid', list(range(10))), 'neuroid_meta': ('neuroid', [0] * 10) }, dims=['presentation', 'neuroid']) cv = CrossValidationSingle(splits=10, stratification_coord=None) metric = self.MetricPlaceholder() score = cv(assembly, apply=metric) assert len(metric.train_assemblies) == len( metric.test_assemblies) == 10 assert len(score.attrs['raw']['split']) == 10
def _package_layer(self, layer_activations, layer, stimuli_paths): assert layer_activations.shape[0] == len(stimuli_paths) activations, flatten_indices = flatten( layer_activations, return_index=True) # collapse for single neuroid dim assert flatten_indices.shape[1] in [1, 2, 3] # see comment in _package for an explanation why we cannot simply have 'channel' for the FC layer if flatten_indices.shape[1] == 1: # FC flatten_coord_names = ['channel', 'channel_x', 'channel_y'] elif flatten_indices.shape[1] == 2: # Transformer flatten_coord_names = ['channel', 'embedding'] elif flatten_indices.shape[1] == 3: # 2DConv flatten_coord_names = ['channel', 'channel_x', 'channel_y'] flatten_coords = { flatten_coord_names[i]: [ sample_index[i] if i < flatten_indices.shape[1] else np.nan for sample_index in flatten_indices ] for i in range(len(flatten_coord_names)) } layer_assembly = NeuroidAssembly( activations, coords={ **{ 'stimulus_path': stimuli_paths, 'neuroid_num': ('neuroid', list(range(activations.shape[1]))), 'model': ('neuroid', [self.identifier] * activations.shape[1]), 'layer': ('neuroid', [layer] * activations.shape[1]), }, **{ coord: ('neuroid', values) for coord, values in flatten_coords.items() } }, dims=['stimulus_path', 'neuroid']) neuroid_id = [ ".".join([f"{value}" for value in values]) for values in zip(*[ layer_assembly[coord].values for coord in ['model', 'layer', 'neuroid_num'] ]) ] layer_assembly['neuroid_id'] = 'neuroid', neuroid_id return layer_assembly
def test_small(self): values = (np.arange(30 * 25 * 5) + np.random.standard_normal(30 * 25 * 5)).reshape((30, 25, 5)) assembly = NeuroidAssembly( values, coords={ 'image_id': ('presentation', np.arange(30)), 'object_name': ('presentation', ['a', 'b', 'c'] * 10), 'neuroid_id': ('neuroid', np.arange(25)), 'region': ('neuroid', ['some_region'] * 25), 'time_bin_start': ('time_bin', list(range(5))), 'time_bin_end': ('time_bin', list(range(1, 6))), }, dims=['presentation', 'neuroid', 'time_bin']) regression = TemporalRegressionAcrossTime(pls_regression()) regression.fit(source=assembly, target=assembly) prediction = regression.predict(source=assembly) assert all(prediction['image_id'] == assembly['image_id']) assert all(prediction['neuroid_id'] == assembly['neuroid_id']) assert all(prediction['time_bin'] == assembly['time_bin'])
def test_across_images(self): values = (np.arange(30 * 25 * 5) + np.random.standard_normal(30 * 25 * 5)).reshape((30, 25, 5)) assembly = NeuroidAssembly( values, coords={ 'image_id': ('presentation', np.arange(30)), 'object_name': ('presentation', ['a', 'b', 'c'] * 10), 'neuroid_id': ('neuroid', np.arange(25)), 'region': ('neuroid', ['some_region'] * 25), 'time_bin_start': ('time_bin', list(range(5))), 'time_bin_end': ('time_bin', list(range(1, 6))), }, dims=['presentation', 'neuroid', 'time_bin']) correlation = TemporalCorrelationAcrossImages(pearsonr_correlation()) score = correlation(assembly, assembly) np.testing.assert_array_equal(score.dims, ['neuroid']) np.testing.assert_array_equal(score['neuroid_id'].values, list(range(25))) np.testing.assert_array_almost_equal(score.values, [1.] * 25) assert set(score.raw.dims) == {'neuroid', 'time_bin'}
def _package_layer(self, layer_activations, layer, sentences): is_per_words = isinstance(layer_activations, list) if is_per_words: # activations are retrieved per-word assert len(layer_activations) == 1 == layer_activations[0].shape[ 0] == len(sentences) activations = layer_activations[0][0] assert len(activations.shape) == 2 words = sentences[0].split(' ') presentation_coords = { 'stimulus_sentence': ('presentation', np.repeat(sentences, len(words))), 'word': ('presentation', words) } else: # activations are retrieved per-sentence assert layer_activations.shape[0] == len(sentences) activations = flatten( layer_activations) # collapse for single neuroid dim presentation_coords = { 'stimulus_sentence': ('presentation', sentences), 'sentence_num': ('presentation', list(range(len(sentences)))) } layer_assembly = NeuroidAssembly( activations, coords={ **presentation_coords, **{ 'neuroid_num': ('neuroid', list(range(activations.shape[1]))), 'model': ('neuroid', [self.identifier] * activations.shape[1]), 'layer': ('neuroid', [layer] * activations.shape[1]), } }, dims=['presentation', 'neuroid']) neuroid_id = [ ".".join([f"{value}" for value in values]) for values in zip(*[ layer_assembly[coord].values for coord in ['model', 'layer', 'neuroid_num'] ]) ] layer_assembly['neuroid_id'] = 'neuroid', neuroid_id return layer_assembly
def test(self): data = NeuroidAssembly( np.tile(np.arange(10)[:, np.newaxis], [5, 10]), coords={ 'image_id': ('presentation', np.tile(list(alphabet)[:10], 5)), 'image_meta': ('presentation', np.tile(list(alphabet)[:10], 5)), 'repetition': ('presentation', np.tile(np.arange(5), 10)), 'neuroid_id': ('neuroid', np.arange(10)), 'neuroid_meta': ('neuroid', np.arange(10)) }, dims=['presentation', 'neuroid']) ceiler = SplitHalfConsistency() ceiling = ceiler(data, data) assert all(ceiling == DataAssembly([approx(1)] * 10, coords={ 'neuroid_id': ('neuroid', np.arange(10)), 'neuroid_meta': ('neuroid', np.arange(10)) }, dims=['neuroid']))