def __init__(self, n_image_features, vocab_size,
		embedding_dim, hidden_size,
		bound_idx, max_sentence_length,
		vl_loss_weight, bound_weight,
		should_train_cnn, n_rsa_samples, use_gpu, K, use_distractors_in_sender):
		super().__init__()

		self.use_gpu = use_gpu
		self.bound_token_idx = bound_idx
		self.max_sentence_length = max_sentence_length
		self.vocab_size = vocab_size
		self.vl_loss_weight = vl_loss_weight # lambda
		self.bound_weight = bound_weight # alpha
		self.should_train_cnn = should_train_cnn
		self.n_rsa_samples = n_rsa_samples
		self.use_distractors_in_sender = use_distractors_in_sender
		self.n_image_features = n_image_features

		if self.should_train_cnn:
			self.cnn = CNN(n_image_features)
		if self.use_distractors_in_sender:
			sender_image_features = (K+1)*n_image_features
		else:
			sender_image_features = n_image_features
		self.sender = Sender(sender_image_features, vocab_size,
			embedding_dim, hidden_size,
			bound_idx, max_sentence_length, vl_loss_weight, bound_weight, use_gpu)
		self.receiver = Receiver(n_image_features, vocab_size,
			embedding_dim, hidden_size, use_gpu)
    'shapes' if not shapes_dataset is None else 'mscoco', vocab_size)
print("loading pretrained cnn")
# Load pretrained CNN if necessary
if not should_train_visual and not use_symbolic_input and not shapes_dataset is None:
    cnn_model_id = cnn_model_file_name.split('/')[-1]

    features_folder_name = 'data/shapes/{}_{}'.format(shapes_dataset,
                                                      cnn_model_id)

    # Check if the features were already extracted with this CNN
    if not os.path.exists(features_folder_name):
        # Load CNN from dumped model
        state = torch.load(cnn_model_file_name,
                           map_location=lambda storage, location: storage)
        cnn_state = {k[4:]: v for k, v in state.items() if 'cnn' in k}
        trained_cnn = CNN(n_image_features)
        trained_cnn.load_state_dict(cnn_state)

        if use_gpu:
            trained_cnn = trained_cnn.cuda()

        print("=CNN state loaded=")
        print("Extracting features...")

        # Dump the features to then load them
        features_folder_name = save_features(trained_cnn, shapes_dataset,
                                             cnn_model_id)

print("crating one hot metadata")
if not shapes_dataset is None:
    # Create onehot metadata if not created yet
class Model(nn.Module):
	def __init__(self, n_image_features, vocab_size,
		embedding_dim, hidden_size,
		bound_idx, max_sentence_length,
		vl_loss_weight, bound_weight,
		should_train_cnn, n_rsa_samples, use_gpu, K, use_distractors_in_sender):
		super().__init__()

		self.use_gpu = use_gpu
		self.bound_token_idx = bound_idx
		self.max_sentence_length = max_sentence_length
		self.vocab_size = vocab_size
		self.vl_loss_weight = vl_loss_weight # lambda
		self.bound_weight = bound_weight # alpha
		self.should_train_cnn = should_train_cnn
		self.n_rsa_samples = n_rsa_samples
		self.use_distractors_in_sender = use_distractors_in_sender
		self.n_image_features = n_image_features

		if self.should_train_cnn:
			self.cnn = CNN(n_image_features)
		if self.use_distractors_in_sender:
			sender_image_features = (K+1)*n_image_features
		else:
			sender_image_features = n_image_features
		self.sender = Sender(sender_image_features, vocab_size,
			embedding_dim, hidden_size,
			bound_idx, max_sentence_length, vl_loss_weight, bound_weight, use_gpu)
		self.receiver = Receiver(n_image_features, vocab_size,
			embedding_dim, hidden_size, use_gpu)

	def _pad(self, m, seq_lengths):
		max_len = m.shape[1]

		mask = torch.arange(max_len)
		if self.use_gpu:
			mask = mask.cuda()

		mask = mask.expand(
			len(seq_lengths), max_len
		) < seq_lengths.unsqueeze(1)

		if self.training:
			mask = mask.type(dtype=m.dtype)
			m = m * mask.unsqueeze(2)
			m[:, :, self.bound_token_idx] += (mask == 0).float()
		else:
			m = m.masked_fill_(mask == 0, self.bound_token_idx)

		return m

	def _get_word_counts(self, m):
		if self.training:
			c = m.sum(dim=1).sum(dim=0).detach() # ToDo: are we sure about this???? Yeah, we are
		else:
			c = torch.zeros([self.vocab_size])
			if self.use_gpu:
				c = c.cuda()

			for w_idx in range(self.vocab_size):
				c[w_idx] = (m == w_idx).sum()
		return c

	def _count_unique_messages(self, m):
		return len(np.unique(m.detach().cpu().numpy(), axis=0))

	def grad_cam(self, mode):
		# mode either s_t, r_t, r_d
		self.mode = mode

	def forward(self, target, distractors, word_counts, target_onehot_metadata):
		self.receiver.mode = self.mode
		batch_size = target.shape[0]

		if self.use_gpu:
			target = target.cuda()
			distractors = [d.cuda() for d in distractors]

		n_dim = 5 if self.should_train_cnn else 3
		use_different_targets = len(target.shape) == n_dim
		assert not use_different_targets or target.shape[1] == 2, 'This should only be two targets'

		if self.should_train_cnn:
			if not use_different_targets:
				# Extract features

				cnn_copy = type(self.cnn)(self.n_image_features) # get a new instance
				cnn_copy.load_state_dict(self.cnn.state_dict()) # copy weights and stuff
				cnn_copy.cuda()
				if self.mode in ['r_d', 's_d']:
					distractors = [self.cnn(d) for d in distractors]
				else:
					distractors = [torch.Tensor(cnn_copy(d).detach().cpu().numpy()).cuda() for d in distractors]
				# self.cnn.zero_grad()
				if self.mode == 's_t':
					target_out = self.cnn(target)
				else:
					target_out = torch.Tensor(cnn_copy(target).detach().cpu().numpy()).cuda()
				target_sender = target_out
				target_receiver = target_out
			else:
				cnn_copy = type(self.cnn)(self.n_image_features) # get a new instance
				cnn_copy.load_state_dict(self.cnn.state_dict()) # copy weights and stuff
				cnn_copy.cuda()
				# Extract features
				if self.mode == 's_t':
					target_sender = self.cnn(target[:, 0, :, :, :])
					target_receiver = torch.Tensor(cnn_copy(target[:, 1, :, :, :]).detach().cpu().numpy()).cuda()

        elif self.mode == 'r_t':
					target_sender = torch.Tensor(cnn_copy(target[:, 0, :, :, :]).detach().cpu().numpy()).cuda()
					target_receiver = self.cnn(target[:, 1, :, :, :])
示例#4
0
# Load metadata
train_metadata, valid_metadata, test_metadata, noise_metadata = load_shapes_classdata(
    shapes_dataset)

print("loaded metadata")
print("loading data")
# Load data
train_data, valid_data, test_data, noise_data = load_images(
    'shapes/{}'.format(shapes_dataset), BATCH_SIZE, K)

print("data loaded")
# Settings

print("creating model")
cnnmodel = CNN(n_image_features)

import torch.nn as nn


class MyModel(nn.Module):
    def __init__(self, cnn, n_out_features, out_classes):
        super(MyModel, self).__init__()
        self.cnn = cnn
        self.fc = nn.Linear(n_out_features, out_classes)

    def forward(self, x):
        x = self.cnn(x)
        x = self.fc(x)
        # x = nn.Softmax(x)
        return x