Exemplo n.º 1
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = cast(Iterable[List[str]],
                         dataset.maybe_get_series(self.data_id))

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), train_mode=train, max_len=self.max_length)

            # sentences_to_tensor returns time-major tensors, targets need to
            # be batch-major
            vectors = vectors.T
            paddings = paddings.T

            bool_mask = (paddings > 0.5)
            flat_labels = vectors[bool_mask]
            label_lengths = bool_mask.sum(axis=1)

            fd[self.label_lengths] = label_lengths
            fd[self.flat_labels] = flat_labels

        return fd
Exemplo n.º 2
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary with the encoder inputs.

        Arguments:
            dataset: The dataset to use
            train: Boolean flag telling whether it is training time
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        series = list(dataset.get_series(self.data_id))
        lengths = []
        inputs = []

        max_len = max(x.shape[0] for x in series)
        if self.max_input_len is not None:
            max_len = min(self.max_input_len, max_len)

        for x in series:
            length = min(max_len, x.shape[0])
            x_padded = np.zeros(shape=(max_len, ) + x.shape[1:], dtype=x.dtype)
            x_padded[:length] = x[:length]

            lengths.append(length)
            inputs.append(x_padded)

        fd[self.inputs] = inputs
        fd[self._input_lengths] = lengths

        return fd
Exemplo n.º 3
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary for the decoder object.

        Arguments:
            dataset: The dataset to use for the decoder.
            train: Boolean flag, telling whether this is a training run.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        go_symbol_idx = self.vocabulary.get_word_index(START_TOKEN)
        fd[self.go_symbols] = np.full([len(dataset)], go_symbol_idx,
                                      dtype=np.int32)

        if sentences is not None:
            sentences_list = list(sentences)
            # train_mode=False, since we don't want to <unk>ize target words!
            inputs, weights = self.vocabulary.sentences_to_tensor(
                sentences_list, self.max_output_len, train_mode=False,
                add_start_symbol=False, add_end_symbol=True,
                pad_to_max_len=False)

            fd[self.train_inputs] = inputs
            fd[self.train_mask] = weights

        return fd
Exemplo n.º 4
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = cast(Iterable[List[str]],
                         dataset.maybe_get_series(self.data_id))

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            vectors, paddings = self.vocabulary.sentences_to_tensor(
                list(sentences), train_mode=train, max_len=self.max_length)

            # sentences_to_tensor returns time-major tensors, targets need to
            # be batch-major
            vectors = vectors.T
            paddings = paddings.T

            # Need to convert the data to a sparse representation
            bool_mask = (paddings > 0.5)
            indices = np.stack(np.where(bool_mask), axis=1)
            values = vectors[bool_mask]

            fd[self.train_targets] = tf.SparseTensorValue(
                indices=indices, values=values, dense_shape=vectors.shape)

        return fd
Exemplo n.º 5
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        # if it is from the pickled file, it is a list, not a numpy tensor,
        # so convert it as as a prevention
        images = np.array(list(dataset.get_series(self.data_id)))
        fd[self.image_input] = images / 255.0
        return fd
Exemplo n.º 6
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences))

        return fd
Exemplo n.º 7
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        # if it is from the pickled file, it is a list, not a numpy tensor,
        # so convert it as as a prevention
        images = np.array(list(dataset.get_series(self.data_id)))
        fd[self.image_input] = images / 255.0
        return fd
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        images = np.array(dataset.get_series(self.data_id))
        assert images.shape[1:] == (self.height, self.width, 3)
        fd[self.input_image] = images

        return fd
Exemplo n.º 9
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        sentences_list = list(sentences) if sentences is not None else None
        if sentences_list is not None:
            fd[self.train_inputs] = list(zip(*sentences_list))[0]

        return fd
Exemplo n.º 10
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        sentences_list = list(sentences) if sentences is not None else None
        if sentences_list is not None:
            fd[self.train_inputs] = list(zip(*sentences_list))[0]

        return fd
Exemplo n.º 11
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            vectors, _ = self.vocabulary.sentences_to_tensor(
                list(sentences), pad_to_max_len=False, train_mode=train)

            fd[self.train_targets] = vectors.T
        return fd
Exemplo n.º 12
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            fd[self.target_tokens] = pad_batch(
                list(sentences), self.max_output_len, self.add_start_symbol,
                self.add_end_symbol)

        return fd
Exemplo n.º 13
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)
        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is not None:
            labels = [l[0] for l in pad_batch(list(sentences),
                                              self.max_output_len)]
            fd[self.targets] = labels

        return fd
Exemplo n.º 14
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)
        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is not None:
            label_tensors, _ = self.vocabulary.sentences_to_tensor(
                list(sentences), self.max_output_len)
            fd[self.gt_inputs[0]] = label_tensors[0]

        return fd
Exemplo n.º 15
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)
        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences),
                                               self.max_output_len,
                                               self.add_start_symbol,
                                               self.add_end_symbol)

        return fd
Exemplo n.º 16
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)
        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is not None:
            labels = [
                l[0] for l in pad_batch(list(sentences), self.max_output_len)
            ]
            fd[self.targets] = labels

        return fd
Exemplo n.º 17
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary with the encoder inputs.

        Arguments:
            dataset: The dataset to use
            train: Boolean flag telling whether it is training time
        """
        fd = ModelPart.feed_dict(self, dataset, train)
        sentences = dataset.get_series(self.data_id)
        fd[self.input_tokens] = pad_batch(list(sentences), self.max_input_len)
        return fd
Exemplo n.º 18
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("You must feed reference sentences when training")

        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences),
                                               self.max_length)

        return fd
Exemplo n.º 19
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("You must feed reference sentences when training")

        if sentences is not None:
            fd[self.target_tokens] = pad_batch(list(sentences),
                                               self.max_length)

        return fd
Exemplo n.º 20
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        # if it is from the pickled file, it is a list, not a numpy tensor,
        # so convert it as as a prevention
        images = np.array(list(dataset.get_series(self.data_id)))

        fd[self.image_input] = images / 255.0

        # the image mask is one everywhere where the image is non-zero, i.e.
        # zero pixels are masked out
        fd[self.image_mask] = np.sign(np.sum(images, axis=3, keepdims=True))

        return fd
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        alignment = dataset.maybe_get_series(self.data_id)
        if alignment is None:
            if train:
                warn("Training alignment not present!")

            alignment = np.zeros((len(dataset), self.decoder.max_output_len,
                                  self.enc_input.max_length), np.float32)

        fd[self.ref_alignment] = alignment

        return fd
Exemplo n.º 22
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary with the encoder inputs.

        Arguments:
            dataset: The dataset to use
            train: Boolean flag telling whether it is training time
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.get_series(self.data_id)
        vectors, _ = self.vocabulary.sentences_to_tensor(
            list(sentences), self.max_input_len, pad_to_max_len=False,
            train_mode=train)

        # as sentences_to_tensor returns lists of shape (time, batch),
        # we need to transpose
        fd[self.inputs] = list(zip(*vectors))

        return fd
Exemplo n.º 23
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Feed the placholders with the data.

        Arguments:
            dataset: The dataset.
            train: A flag whether the train mode is enabled.

        Returns:
            The constructed feed dictionary that contains the factor data and
            the mask.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        # for checking the lengths of individual factors
        arr_strings = []
        last_paddings = None

        for factor_plc, name, vocabulary in zip(self.input_factors,
                                                self.data_ids,
                                                self.vocabularies):
            factors = dataset.get_series(name)
            vectors, paddings = vocabulary.sentences_to_tensor(
                list(factors),
                self.max_length,
                pad_to_max_len=False,
                train_mode=train,
                add_start_symbol=self.add_start_symbol,
                add_end_symbol=self.add_end_symbol)

            fd[factor_plc] = list(zip(*vectors))

            arr_strings.append(paddings.tostring())
            last_paddings = paddings

        if len(set(arr_strings)) > 1:
            raise ValueError("The lenghts of factors do not match")

        assert last_paddings is not None
        fd[self.mask] = list(zip(*last_paddings))

        return fd
Exemplo n.º 24
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary for the decoder object.

        Arguments:
            dataset: The dataset to use for the decoder.
            train: Boolean flag, telling whether this is a training run.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            fd[self.train_tokens] = pad_batch(
                list(sentences), self.max_output_len, add_start_symbol=False,
                add_end_symbol=True)

        return fd
Exemplo n.º 25
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Feed the placholders with the data.

        Arguments:
            dataset: The dataset.
            train: A flag whether the train mode is enabled.

        Returns:
            The constructed feed dictionary that contains the factor data and
            the mask.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        # for checking the lengths of individual factors
        for factor_plc, name in zip(self.input_factors, self.data_ids):
            sentences = dataset.get_series(name)
            fd[factor_plc] = pad_batch(
                list(sentences), self.max_length, self.add_start_symbol,
                self.add_end_symbol)

        return fd
Exemplo n.º 26
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Feed the placholders with the data.

        Arguments:
            dataset: The dataset.
            train: A flag whether the train mode is enabled.

        Returns:
            The constructed feed dictionary that contains the factor data and
            the mask.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        # for checking the lengths of individual factors
        for factor_plc, name in zip(self.input_factors, self.data_ids):
            sentences = dataset.get_series(name)
            fd[factor_plc] = pad_batch(list(sentences), self.max_length,
                                       self.add_start_symbol,
                                       self.add_end_symbol)

        return fd
Exemplo n.º 27
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        """Populate the feed dictionary for the decoder object.

        Arguments:
            dataset: The dataset to use for the decoder.
            train: Boolean flag, telling whether this is a training run.
        """
        fd = ModelPart.feed_dict(self, dataset, train)

        sentences = dataset.maybe_get_series(self.data_id)

        if sentences is None and train:
            raise ValueError("When training, you must feed "
                             "reference sentences")

        if sentences is not None:
            fd[self.train_tokens] = pad_batch(list(sentences),
                                              self.max_output_len,
                                              add_start_symbol=False,
                                              add_end_symbol=True)

        return fd
Exemplo n.º 28
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        series = list(dataset.get_series(self.data_id))
        lengths = []
        inputs = []

        max_len = max(x.shape[0] for x in series)
        if self.max_input_len is not None:
            max_len = min(self.max_input_len, max_len)

        for x in series:
            length = min(max_len, x.shape[0])
            x_padded = np.zeros(shape=(max_len, ) + x.shape[1:], dtype=x.dtype)
            x_padded[:length] = x[:length]

            lengths.append(length)
            inputs.append(x_padded)

        fd[self.temporal_states] = inputs
        fd[self._input_lengths] = lengths

        return fd
Exemplo n.º 29
0
    def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
        fd = ModelPart.feed_dict(self, dataset, train)

        series = list(dataset.get_series(self.data_id))
        lengths = []
        inputs = []

        max_len = max(x.shape[0] for x in series)
        if self.max_input_len is not None:
            max_len = min(self.max_input_len, max_len)

        for x in series:
            length = min(max_len, x.shape[0])
            x_padded = np.zeros(shape=(max_len,) + x.shape[1:],
                                dtype=x.dtype)
            x_padded[:length] = x[:length]

            lengths.append(length)
            inputs.append(x_padded)

        fd[self.temporal_states] = inputs
        fd[self._input_lengths] = lengths

        return fd
Exemplo n.º 30
0
 def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
     fd = ModelPart.feed_dict(self, dataset, train)
     fd[self.spatial_input] = list(dataset.get_series(self.data_id))
     return fd
Exemplo n.º 31
0
 def feed_dict(self, dataset: Dataset, train: bool = True) -> FeedDict:
     return ModelPart.feed_dict(self, dataset, train)
Exemplo n.º 32
0
 def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
     fd = ModelPart.feed_dict(self, dataset, train)
     fd[self.spatial_input] = list(dataset.get_series(self.data_id))
     return fd
Exemplo n.º 33
0
 def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
     fd = ModelPart.feed_dict(self, dataset, train)
     fd[self.vector] = dataset.get_series(self.data_id)
     return fd
Exemplo n.º 34
0
 def feed_dict(self, dataset: Dataset, train: bool = True) -> FeedDict:
     return ModelPart.feed_dict(self, dataset, train)
Exemplo n.º 35
0
 def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
     fd = ModelPart.feed_dict(self, dataset, train)
     fd[self.vector] = dataset.get_series(self.data_id)
     return fd