def _collate(self, batch): """ Puts each data field into a tensor. :param batch: The input data batch. :type batch: list of (word sequences, features, feature_weights) tuples :return: Preprocessed data. :rtype: list of torch.Tensor with torch.Tensor (Optional) """ Y_batch = None if isinstance(batch[0], tuple): batch, Y_batch = list(zip(*batch)) Y_batch = self._cuda(torch.Tensor(Y_batch)) batch, f_batch, v_batch = list(zip(*batch)) f_batch, _ = pad_batch(f_batch, 0) v_batch, _ = pad_batch(v_batch, 0, type="float") f_batch = self._cuda(f_batch) v_batch = self._cuda(v_batch) X_batch = [] for samples in list(zip(*batch)): x, x_mask = pad_batch(samples, max_len=self.settings["max_sentence_length"]) X_batch.append((self._cuda(x), self._cuda(x_mask))) X_batch.extend([f_batch, v_batch]) if Y_batch is not None: return X_batch, Y_batch else: return X_batch
def _calc_logits(self, X, batch_size=None): """ Calculate the logits. :param X: The input data of the model :param batch_size: The batch size """ # Generate sparse multi-modal feature input F = np.array(list( zip(*X))[1]) + 1 # Correct the index since 0 is the padding V = np.array(list(zip(*X))[2]) outputs = (torch.Tensor([]).cuda() if self.model_kwargs["host_device"] in self.gpu else torch.Tensor([])) n = len(F) if batch_size is None: batch_size = n for batch_st in range(0, n, batch_size): batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n features, _ = pad_batch(F[batch_st:batch_ed], 0) values, _ = pad_batch(V[batch_st:batch_ed], 0, type="float") if self.model_kwargs["host_device"] in self.gpu: features = features.cuda() values = values.cuda() output = self.forward(features, values) if self.cardinality == 2: outputs = torch.cat((outputs, output.view(-1)), 0) else: outputs = torch.cat((outputs, output), 0) return outputs
def _collate(self, batch): """ Puts each data field into a tensor. :param batch: The input data batch. :type batch: list of (features, feature_weights) pair :return: Preprocessed data. :rtype: list of torch.Tensor with torch.Tensor (Optional) """ Y_batch = None if isinstance(batch[0], tuple): batch, Y_batch = list(zip(*batch)) Y_batch = self._cuda(torch.Tensor(Y_batch)) f_batch, v_batch = list(zip(*batch)) f_batch, _ = pad_batch(f_batch, 0) v_batch, _ = pad_batch(v_batch, 0, type="float") f_batch = self._cuda(f_batch) v_batch = self._cuda(v_batch) if Y_batch is not None: return [f_batch, v_batch], Y_batch else: return [f_batch, v_batch]
def _collate(self, batch): """ Puts each data field into a tensor. :param batch: The input data batch. :type batch: list of (candidate, features) pairs :return: Preprocessed data. :rtype: list of torch.Tensor with torch.Tensor (Optional) """ Y_batch = None if isinstance(batch[0], tuple): batch, Y_batch = list(zip(*batch)) Y_batch = self._cuda(torch.Tensor(Y_batch)) batch, f_batch = list(zip(*batch)) X_batch = [] for samples in list(zip(*batch)): x, x_mask = pad_batch(samples, max_len=self.settings["max_sentence_length"]) X_batch.append((self._cuda(x), self._cuda(x_mask))) X_batch.append(self._cuda(torch.Tensor(f_batch))) if Y_batch is not None: return X_batch, Y_batch else: return X_batch
def _calc_logits(self, X, batch_size=None): """ Calculate the logits. :param X: The input data of the model. :type X: list of (candidate, features) pairs :param batch_size: The batch size. :type batch_size: int :return: The output logits of model. :rtype: torch.Tensor of shape (batch_size, num_classes) if num_classes > 2 otherwise shape (batch_size, 1) """ # Generate LSTM input C = np.array(list(zip(*X))[0]) # Check LSTM input dimension size matches the number of lstms in the model assert len(C[0]) == len(self.lstms) # Generate multi-modal feature input F = np.array(list(zip(*X))[1]) F = torch.Tensor(F).squeeze(1) outputs = (torch.Tensor([]).cuda() if self.settings["host_device"] in self._gpu else torch.Tensor([])) n = len(F) if batch_size is None: batch_size = n for batch_st in range(0, n, batch_size): batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n # TODO: optimize this sequences = [] # For loop each relation arity for i in range(len(C[0])): sequence = [] # Generate sequence for the batch for j in range(batch_st, batch_ed): sequence.append(C[j][i]) x, x_mask = pad_batch(sequence, self.settings["max_sentence_length"]) if self.settings["host_device"] in self._gpu: x = x.cuda() x_mask = x_mask.cuda() sequences.append((x, x_mask)) features = (F[batch_st:batch_ed].cuda() if self.settings["host_device"] in self._gpu else F[batch_st:batch_ed]) output = self.forward(sequences, features) if self.cardinality == 2: outputs = torch.cat((outputs, output.view(-1)), 0) else: outputs = torch.cat((outputs, output), 0) return outputs
def _calc_logits(self, X, batch_size=None): """ Calculate the logits. :param X: The input data of the model. :type X: list of (candidate, fetures) pair :param batch_size: The batch size. :type batch_size: int :return: The output logits of model. :rtype: torch.Tensor of shape (batch_size, num_classes) if num_classes > 2 otherwise shape (batch_size, 1) """ # Generate sparse multi-modal feature input F = np.array(list( zip(*X))[1]) + 1 # Correct the index since 0 is the padding V = np.array(list(zip(*X))[2]) outputs = (torch.Tensor([]).cuda() if self.settings["host_device"] in self._gpu else torch.Tensor([])) n = len(F) if batch_size is None: batch_size = n for batch_st in range(0, n, batch_size): batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n features, _ = pad_batch(F[batch_st:batch_ed], 0) values, _ = pad_batch(V[batch_st:batch_ed], 0, type="float") if self.settings["host_device"] in self._gpu: features = features.cuda() values = values.cuda() output = self.forward(features, values) if self.cardinality == 2: outputs = torch.cat((outputs, output.view(-1)), 0) else: outputs = torch.cat((outputs, output), 0) return outputs
def _calc_logits(self, X, batch_size=None): """ Calculate the logits. :param X: The input data of the model :param batch_size: The batch size """ # Generate LSTM input C = np.array(list(zip(*X))[0]) # Check LSTM input dimension size matches the number of lstms in the model assert len(C[0]) == len(self.lstms) # Generate multi-modal feature input F = np.array(list(zip(*X))[1]) F = torch.Tensor(F).squeeze(1) outputs = (torch.Tensor([]).cuda() if self.model_kwargs["host_device"] in self.gpu else torch.Tensor([])) n = len(F) if batch_size is None: batch_size = n for batch_st in range(0, n, batch_size): batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n # TODO: optimize this sequences = [] for i in range(len(C[0])): sequence = [] for j in range(batch_st, batch_ed): sequence.append(C[j][i]) x, x_mask = pad_batch(sequence, self.model_kwargs["max_sentence_length"]) if self.model_kwargs["host_device"] in self.gpu: x = x.cuda() x_mask = x_mask.cuda() sequences.append((x, x_mask)) features = (F[batch_st:batch_ed].cuda() if self.model_kwargs["host_device"] in self.gpu else F[batch_st:batch_ed]) output = self.forward(sequences, features) if self.cardinality == 2: outputs = torch.cat((outputs, output.view(-1)), 0) else: outputs = torch.cat((outputs, output), 0) return outputs
def _calc_logits(self, X, batch_size=None): """ Calculate the logits. :param X: The input data of the model. :type X: list of (candidate, features) pairs :param batch_size: The batch size. :type batch_size: int :return: The output logits of model. :rtype: torch.Tensor of shape (batch_size, num_classes) if num_classes > 2 otherwise shape (batch_size, 1) """ # Generate LSTM input C = np.array(list(zip(*X))[0]) # Check LSTM input dimension size matches the number of lstms in the model assert len(C[0]) == len(self.lstms) # Generate sparse multi-modal feature input F = ( np.array(list(zip(*X))[1]) + self.settings["lstm_dim"] + 1 ) # Correct the index since 0 is the padding and placeholder for lstm feature V = np.array(list(zip(*X))[2]) outputs = (torch.Tensor([]).cuda() if self.settings["host_device"] in self._gpu else torch.Tensor([])) n = len(F) if batch_size is None: batch_size = n for batch_st in range(0, n, batch_size): batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n # TODO: optimize this sequences = [] # For loop each relation arity for i in range(len(C[0])): sequence = [] # Generate sequence for the batch for j in range(batch_st, batch_ed): sequence.append(C[j][i]) x, x_mask = pad_batch(sequence, self.settings["max_sentence_length"]) if self.settings["host_device"] in self._gpu: x = x.cuda() x_mask = x_mask.cuda() sequences.append((x, x_mask)) lstm_weight_indices = torch.as_tensor( np.arange(1, self.settings["lstm_dim"] + 1)).repeat( batch_ed - batch_st, 1) features, _ = pad_batch(F[batch_st:batch_ed], 0) values, _ = pad_batch(V[batch_st:batch_ed], 0, type="float") if self.settings["host_device"] in self._gpu: lstm_weight_indices = lstm_weight_indices.cuda() features = features.cuda() values = values.cuda() output = self.forward(sequences, lstm_weight_indices, features, values) if self.cardinality == 2: outputs = torch.cat((outputs, output.view(-1)), 0) else: outputs = torch.cat((outputs, output), 0) return outputs