def create_and_check_for_pretraining(self, config, pixel_values, labels): model = ViTMAEForPreTraining(config) model.to(torch_device) model.eval() result = model(pixel_values) # expected sequence length = num_patches image_size = to_2tuple(self.image_size) patch_size = to_2tuple(self.patch_size) num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) expected_seq_len = num_patches expected_num_channels = self.patch_size ** 2 * self.num_channels self.parent.assertEqual(result.logits.shape, (self.batch_size, expected_seq_len, expected_num_channels))
def create_and_check_for_pretraining(self, config, pixel_values, labels): model = ViTMAEForPreTraining(config) model.to(torch_device) model.eval() result = model(pixel_values) num_patches = (self.image_size // self.patch_size)**2 expected_num_channels = self.patch_size**2 * self.num_channels self.parent.assertEqual( result.logits.shape, (self.batch_size, num_patches, expected_num_channels)) # test greyscale images config.num_channels = 1 model = ViTMAEForPreTraining(config) model.to(torch_device) model.eval() pixel_values = floats_tensor( [self.batch_size, 1, self.image_size, self.image_size]) result = model(pixel_values) expected_num_channels = self.patch_size**2 self.parent.assertEqual( result.logits.shape, (self.batch_size, num_patches, expected_num_channels))