Python pad_batch示例，fonduer.learning.disc_models.utils.pad_batch Python示例

示例#1

0

显示文件

    def _collate(self, batch):
        """
        Puts each data field into a tensor.

        :param batch: The input data batch.
        :type batch: list of (word sequences, features, feature_weights) tuples
        :return: Preprocessed data.
        :rtype: list of torch.Tensor with torch.Tensor (Optional)
        """

        Y_batch = None
        if isinstance(batch[0], tuple):
            batch, Y_batch = list(zip(*batch))
            Y_batch = self._cuda(torch.Tensor(Y_batch))

        batch, f_batch, v_batch = list(zip(*batch))

        f_batch, _ = pad_batch(f_batch, 0)
        v_batch, _ = pad_batch(v_batch, 0, type="float")

        f_batch = self._cuda(f_batch)
        v_batch = self._cuda(v_batch)

        X_batch = []

        for samples in list(zip(*batch)):
            x, x_mask = pad_batch(samples,
                                  max_len=self.settings["max_sentence_length"])
            X_batch.append((self._cuda(x), self._cuda(x_mask)))
        X_batch.extend([f_batch, v_batch])

        if Y_batch is not None:
            return X_batch, Y_batch
        else:
            return X_batch

示例#2

0

显示文件

文件： sparse_logistic_regression.py 项目： liangzhangsg/fonduer

    def _calc_logits(self, X, batch_size=None):
        """
        Calculate the logits.

        :param X: The input data of the model
        :param batch_size: The batch size
        """
        # Generate sparse multi-modal feature input
        F = np.array(list(
            zip(*X))[1]) + 1  # Correct the index since 0 is the padding
        V = np.array(list(zip(*X))[2])

        outputs = (torch.Tensor([]).cuda() if self.model_kwargs["host_device"]
                   in self.gpu else torch.Tensor([]))

        n = len(F)
        if batch_size is None:
            batch_size = n
        for batch_st in range(0, n, batch_size):
            batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n

            features, _ = pad_batch(F[batch_st:batch_ed], 0)
            values, _ = pad_batch(V[batch_st:batch_ed], 0, type="float")

            if self.model_kwargs["host_device"] in self.gpu:
                features = features.cuda()
                values = values.cuda()

            output = self.forward(features, values)
            if self.cardinality == 2:
                outputs = torch.cat((outputs, output.view(-1)), 0)
            else:
                outputs = torch.cat((outputs, output), 0)

        return outputs

示例#3

0

显示文件

文件： sparse_logistic_regression.py 项目： srravula1/fonduer

    def _collate(self, batch):
        """
        Puts each data field into a tensor.

        :param batch: The input data batch.
        :type batch: list of (features, feature_weights) pair
        :return: Preprocessed data.
        :rtype: list of torch.Tensor with torch.Tensor (Optional)
        """

        Y_batch = None
        if isinstance(batch[0], tuple):
            batch, Y_batch = list(zip(*batch))
            Y_batch = self._cuda(torch.Tensor(Y_batch))

        f_batch, v_batch = list(zip(*batch))

        f_batch, _ = pad_batch(f_batch, 0)
        v_batch, _ = pad_batch(v_batch, 0, type="float")

        f_batch = self._cuda(f_batch)
        v_batch = self._cuda(v_batch)

        if Y_batch is not None:
            return [f_batch, v_batch], Y_batch
        else:
            return [f_batch, v_batch]

示例#4

0

显示文件

    def _collate(self, batch):
        """
        Puts each data field into a tensor.

        :param batch: The input data batch.
        :type batch: list of (candidate, features) pairs
        :return: Preprocessed data.
        :rtype: list of torch.Tensor with torch.Tensor (Optional)
        """

        Y_batch = None
        if isinstance(batch[0], tuple):
            batch, Y_batch = list(zip(*batch))
            Y_batch = self._cuda(torch.Tensor(Y_batch))

        batch, f_batch = list(zip(*batch))

        X_batch = []

        for samples in list(zip(*batch)):
            x, x_mask = pad_batch(samples, max_len=self.settings["max_sentence_length"])
            X_batch.append((self._cuda(x), self._cuda(x_mask)))
        X_batch.append(self._cuda(torch.Tensor(f_batch)))

        if Y_batch is not None:
            return X_batch, Y_batch
        else:
            return X_batch

示例#5

0

显示文件

文件： lstm.py 项目： nicholaschiang/fonduer

    def _calc_logits(self, X, batch_size=None):
        """
        Calculate the logits.

        :param X: The input data of the model.
        :type X: list of (candidate, features) pairs
        :param batch_size: The batch size.
        :type batch_size: int
        :return: The output logits of model.
        :rtype: torch.Tensor of shape (batch_size, num_classes) if num_classes > 2
            otherwise shape (batch_size, 1)
        """

        # Generate LSTM input
        C = np.array(list(zip(*X))[0])

        # Check LSTM input dimension size matches the number of lstms in the model
        assert len(C[0]) == len(self.lstms)

        # Generate multi-modal feature input
        F = np.array(list(zip(*X))[1])
        F = torch.Tensor(F).squeeze(1)

        outputs = (torch.Tensor([]).cuda() if self.settings["host_device"]
                   in self._gpu else torch.Tensor([]))

        n = len(F)
        if batch_size is None:
            batch_size = n
        for batch_st in range(0, n, batch_size):
            batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n

            # TODO: optimize this
            sequences = []
            # For loop each relation arity
            for i in range(len(C[0])):
                sequence = []
                # Generate sequence for the batch
                for j in range(batch_st, batch_ed):
                    sequence.append(C[j][i])
                x, x_mask = pad_batch(sequence,
                                      self.settings["max_sentence_length"])
                if self.settings["host_device"] in self._gpu:
                    x = x.cuda()
                    x_mask = x_mask.cuda()
                sequences.append((x, x_mask))

            features = (F[batch_st:batch_ed].cuda()
                        if self.settings["host_device"] in self._gpu else
                        F[batch_st:batch_ed])

            output = self.forward(sequences, features)
            if self.cardinality == 2:
                outputs = torch.cat((outputs, output.view(-1)), 0)
            else:
                outputs = torch.cat((outputs, output), 0)

        return outputs

示例#6

0

显示文件

    def _calc_logits(self, X, batch_size=None):
        """
        Calculate the logits.

        :param X: The input data of the model.
        :type X: list of (candidate, fetures) pair
        :param batch_size: The batch size.
        :type batch_size: int
        :return: The output logits of model.
        :rtype: torch.Tensor of shape (batch_size, num_classes) if num_classes > 2
            otherwise shape (batch_size, 1)
        """

        # Generate sparse multi-modal feature input
        F = np.array(list(
            zip(*X))[1]) + 1  # Correct the index since 0 is the padding
        V = np.array(list(zip(*X))[2])

        outputs = (torch.Tensor([]).cuda() if self.settings["host_device"]
                   in self._gpu else torch.Tensor([]))

        n = len(F)
        if batch_size is None:
            batch_size = n
        for batch_st in range(0, n, batch_size):
            batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n

            features, _ = pad_batch(F[batch_st:batch_ed], 0)
            values, _ = pad_batch(V[batch_st:batch_ed], 0, type="float")

            if self.settings["host_device"] in self._gpu:
                features = features.cuda()
                values = values.cuda()

            output = self.forward(features, values)
            if self.cardinality == 2:
                outputs = torch.cat((outputs, output.view(-1)), 0)
            else:
                outputs = torch.cat((outputs, output), 0)

        return outputs

示例#7

0

显示文件

文件： lstm.py 项目： liangzhangsg/fonduer

    def _calc_logits(self, X, batch_size=None):
        """
        Calculate the logits.

        :param X: The input data of the model
        :param batch_size: The batch size
        """
        # Generate LSTM input
        C = np.array(list(zip(*X))[0])

        # Check LSTM input dimension size matches the number of lstms in the model
        assert len(C[0]) == len(self.lstms)

        # Generate multi-modal feature input
        F = np.array(list(zip(*X))[1])
        F = torch.Tensor(F).squeeze(1)

        outputs = (torch.Tensor([]).cuda() if self.model_kwargs["host_device"]
                   in self.gpu else torch.Tensor([]))

        n = len(F)
        if batch_size is None:
            batch_size = n
        for batch_st in range(0, n, batch_size):
            batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n

            # TODO: optimize this
            sequences = []
            for i in range(len(C[0])):
                sequence = []
                for j in range(batch_st, batch_ed):
                    sequence.append(C[j][i])
                x, x_mask = pad_batch(sequence,
                                      self.model_kwargs["max_sentence_length"])
                if self.model_kwargs["host_device"] in self.gpu:
                    x = x.cuda()
                    x_mask = x_mask.cuda()
                sequences.append((x, x_mask))

            features = (F[batch_st:batch_ed].cuda()
                        if self.model_kwargs["host_device"] in self.gpu else
                        F[batch_st:batch_ed])

            output = self.forward(sequences, features)
            if self.cardinality == 2:
                outputs = torch.cat((outputs, output.view(-1)), 0)
            else:
                outputs = torch.cat((outputs, output), 0)

        return outputs

示例#8

0

显示文件

文件： sparse_lstm.py 项目： vishnuraj007/fonduer

    def _calc_logits(self, X, batch_size=None):
        """
        Calculate the logits.

        :param X: The input data of the model.
        :type X: list of (candidate, features) pairs
        :param batch_size: The batch size.
        :type batch_size: int
        :return: The output logits of model.
        :rtype: torch.Tensor of shape (batch_size, num_classes) if num_classes > 2
            otherwise shape (batch_size, 1)
        """

        # Generate LSTM input
        C = np.array(list(zip(*X))[0])

        # Check LSTM input dimension size matches the number of lstms in the model
        assert len(C[0]) == len(self.lstms)

        # Generate sparse multi-modal feature input
        F = (
            np.array(list(zip(*X))[1]) + self.settings["lstm_dim"] + 1
        )  # Correct the index since 0 is the padding and placeholder for lstm feature
        V = np.array(list(zip(*X))[2])

        outputs = (torch.Tensor([]).cuda() if self.settings["host_device"]
                   in self._gpu else torch.Tensor([]))

        n = len(F)
        if batch_size is None:
            batch_size = n
        for batch_st in range(0, n, batch_size):
            batch_ed = batch_st + batch_size if batch_st + batch_size <= n else n

            # TODO: optimize this
            sequences = []
            # For loop each relation arity
            for i in range(len(C[0])):
                sequence = []
                # Generate sequence for the batch
                for j in range(batch_st, batch_ed):
                    sequence.append(C[j][i])
                x, x_mask = pad_batch(sequence,
                                      self.settings["max_sentence_length"])
                if self.settings["host_device"] in self._gpu:
                    x = x.cuda()
                    x_mask = x_mask.cuda()
                sequences.append((x, x_mask))

            lstm_weight_indices = torch.as_tensor(
                np.arange(1, self.settings["lstm_dim"] + 1)).repeat(
                    batch_ed - batch_st, 1)

            features, _ = pad_batch(F[batch_st:batch_ed], 0)
            values, _ = pad_batch(V[batch_st:batch_ed], 0, type="float")

            if self.settings["host_device"] in self._gpu:
                lstm_weight_indices = lstm_weight_indices.cuda()
                features = features.cuda()
                values = values.cuda()

            output = self.forward(sequences, lstm_weight_indices, features,
                                  values)
            if self.cardinality == 2:
                outputs = torch.cat((outputs, output.view(-1)), 0)
            else:
                outputs = torch.cat((outputs, output), 0)

        return outputs