def test_av_load_all_layers_one_identifier(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: av_01 = torch.randn(36, 16) av_02 = torch.randn(16, 16) av_03 = torch.randn(4, 16) avs_0 = [av_01, av_02, av_03] av_11 = torch.randn(36, 16) av_12 = torch.randn(16, 16) av_13 = torch.randn(4, 16) avs_1 = [av_11, av_12, av_13] idf1, idf2 = "idf1", "idf2" AV.save( tmpdir, "dummy", idf1, ["layer1.0.conv1", "layer1.0.conv2", "layer1.1.conv1"], avs_0, "0", ) dataloader = DataLoader(cast(Dataset, AV.load(tmpdir, "dummy"))) self.assertEqual(len(dataloader), 3) AV.save( tmpdir, "dummy", idf2, ["layer1.0.conv1", "layer1.0.conv2", "layer1.1.conv1"], avs_1, "0", ) dataloader = DataLoader(cast(Dataset, AV.load(tmpdir, "dummy"))) self.assertEqual(len(dataloader), 6) # check activations for idf1 dataloader_layer = DataLoader( cast(Dataset, AV.load(tmpdir, "dummy", identifier=idf1))) self.assertEqual(len(dataloader_layer), 3) for i, av in enumerate(dataloader_layer): assertTensorAlmostEqual(self, av, avs_0[i].unsqueeze(0)) # check activations for idf2 dataloader_layer = DataLoader( cast(Dataset, AV.load(tmpdir, "dummy", identifier=idf2))) self.assertEqual(len(dataloader_layer), 3) for i, av in enumerate(dataloader_layer): assertTensorAlmostEqual(self, av, avs_1[i].unsqueeze(0))
def test_exists_without_version(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: av_0 = torch.randn(64, 16) self.assertFalse(AV.exists(tmpdir, "dummy", "layer1.0.conv1")) AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1", av_0, "0") self.assertTrue( AV.exists( tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1", ))
def test_exists_with_version(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: idf1 = str(int(datetime.now().microsecond)) idf2 = "idf2" av_0 = torch.randn(64, 16) self.assertFalse(AV.exists(tmpdir, "dummy", "layer1.0.conv1", idf1)) self.assertFalse(AV.exists(tmpdir, "dummy", "layer1.0.conv1", idf2)) AV.save(tmpdir, "dummy", idf1, "layer1.0.conv1", av_0, "0") self.assertTrue(AV.exists(tmpdir, "dummy", idf1, "layer1.0.conv1")) self.assertFalse(AV.exists(tmpdir, "dummy", idf2, "layer1.0.conv1")) AV.save(tmpdir, "dummy", idf2, "layer1.0.conv1", av_0, "0") self.assertTrue(AV.exists(tmpdir, "dummy", idf2, "layer1.0.conv1"))
def save_and_assert_batch(layer_path, total_num_batches, batch, n_batch_name): # save n-th batch and verify the number of saved batches AV.save( tmpdir, model_id, DEFAULT_IDENTIFIER, "layer1.0.conv1", batch, n_batch_name, ) self.assertEqual( len(glob.glob("/".join([layer_path, "*.pt"]))), total_num_batches, ) self.assertTrue( AV.exists(tmpdir, model_id, DEFAULT_IDENTIFIER, "layer1.0.conv1", n_batch_name))
def test_av_save_two_layers(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: av_0 = torch.randn(64, 16) AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1", av_0, "0") self.assertTrue( AV.exists(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1")) self.assertFalse( AV.exists(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv2")) # experimenting with adding to another layer av_1 = torch.randn(64, 16) AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv2", av_1, "0") self.assertTrue( AV.exists(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv2"))
def test_av_load_one_batch(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: av_0 = torch.randn(64, 16) av_1 = torch.randn(36, 16) avs = [av_0, av_1] # add av_0 to the list of activations model_id = "dummy" with self.assertRaises(RuntimeError) as context: AV.load(tmpdir, model_id) self.assertTrue( (f"Activation vectors for model {model_id} " f"was not found at path {tmpdir}") == str(context.exception)) AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1", av_0, "0") model_id = "dummy" dataset = AV.load(tmpdir, model_id, identifier=DEFAULT_IDENTIFIER) for i, av in enumerate(DataLoader(cast(Dataset, dataset))): assertTensorAlmostEqual(self, av, avs[i].unsqueeze(0)) # add av_1 to the list of activations dataloader_2 = DataLoader( cast( Dataset, AV.load(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv2"), )) self.assertEqual(len(dataloader_2), 0) AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv2", av_1, "0") dataset = AV.load(tmpdir, "dummy", identifier=DEFAULT_IDENTIFIER) dataloader = DataLoader(cast(Dataset, dataset)) self.assertEqual(len(dataloader), 2) for i, av in enumerate(dataloader): assertTensorAlmostEqual(self, av, avs[i].unsqueeze(0))
def generate_activation(self, layers: Union[str, List], concept: Concept) -> None: r""" Computes layer activations for the specified `concept` and the list of layer(s) `layers`. Args: layers (str, list[str]): A list of layer names or a layer name that is used to compute layer activations for the specific `concept`. concept (Concept): A single Concept object that provides access to concept examples using a data iterator. """ layers = [layers] if isinstance(layers, str) else layers layer_modules = [_get_module_from_name(self.model, layer) for layer in layers] layer_act = LayerActivation(self.model, layer_modules) assert concept.data_iter is not None, ( "Data iterator for concept id:", "{} must be specified".format(concept.id), ) for i, examples in enumerate(concept.data_iter): activations = layer_act.attribute.__wrapped__( # type: ignore layer_act, examples, attribute_to_layer_input=self.attribute_to_layer_input, ) for activation, layer_name in zip(activations, layers): activation = torch.reshape(activation, (activation.shape[0], -1)) AV.save( self.save_path, self.model_id, concept.identifier, layer_name, activation.detach(), str(i), )
def save_load_and_assert_batch(layer_path, total_num_batches, batch, n_batch_name): # save n-th batch and verify the number of saved batches AV.save( tmpdir, model_id, DEFAULT_IDENTIFIER, "layer1.0.conv1", batch, n_batch_name, ) loaded_dataset = AV.load(tmpdir, model_id, DEFAULT_IDENTIFIER, "layer1.0.conv1", n_batch_name) assertTensorAlmostEqual(self, next(iter(loaded_dataset)), batch, 0.0) loaded_dataset_for_layer = AV.load(tmpdir, model_id, DEFAULT_IDENTIFIER, "layer1.0.conv1") self.assertEqual( loaded_dataset_for_layer.__len__(), total_num_batches, )
def test_av_save_multi_layer(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: av_0 = torch.randn(64, 16) av_1 = torch.randn(64, 16) av_2 = torch.randn(64, 16) model_path = AV._assemble_model_dir(tmpdir, "dummy") # save first layer AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1", av_0, "0") self.assertEqual(len(glob.glob(model_path + "*")), 1) # add two new layers at once AV.save( tmpdir, "dummy", DEFAULT_IDENTIFIER, ["layer1.0.conv2", "layer1.1.conv1"], [av_1, av_2], "0", ) self.assertEqual(len(glob.glob(model_path + "/*/*/*")), 3) # overwrite the first saved layer AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1", av_0, "0") self.assertEqual(len(glob.glob(model_path + "/*/*/*")), 3) # save a new version of the first layer idf1 = str(int(datetime.now().microsecond)) self.assertFalse(AV.exists(tmpdir, "dummy", idf1, "layer1.0.conv1")) AV.save(tmpdir, "dummy", idf1, "layer1.0.conv1", av_0, "0") self.assertTrue(AV.exists(tmpdir, "dummy", idf1, "layer1.0.conv1")) self.assertEqual(len(glob.glob(model_path + "/*/*/*")), 4)
def test_av_load_all_identifiers_one_layer(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: av_0 = torch.randn(64, 16) av_1 = torch.randn(36, 16) av_2 = torch.randn(16, 16) av_3 = torch.randn(4, 16) avs = [av_1, av_2, av_3] idf1, idf2, idf3 = "idf1", "idf2", "idf3" AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1", av_0, "0") dataloader = DataLoader( cast(Dataset, AV.load(tmpdir, "dummy", identifier=DEFAULT_IDENTIFIER))) self.assertEqual(len(dataloader), 1) # add activations for another layer AV.save(tmpdir, "dummy", idf1, "layer1.0.conv2", av_1, "0") AV.save(tmpdir, "dummy", idf2, "layer1.0.conv2", av_2, "0") AV.save(tmpdir, "dummy", idf3, "layer1.0.conv2", av_3, "0") dataloader_layer = DataLoader( cast( Dataset, AV.load( tmpdir, "dummy", layer="layer1.0.conv2", ), )) self.assertEqual(len(dataloader_layer), 3) for i, av in enumerate(dataloader_layer): assertTensorAlmostEqual(self, av, avs[i].unsqueeze(0)) dataloader = DataLoader(cast(Dataset, AV.load(tmpdir, "dummy"))) self.assertEqual(len(dataloader), 4)