Exemplo n.º 1
0
    def encode(self, active_features, input_feature_map):
        """Returns a binary vector indicating which features are active.

        Given a dictionary of active_features (e.g. 'intent_greet',
        'prev_action_listen',...) return a binary vector indicating which
        features of `self.input_features` are in the bag. NB it's a
        regular double precision float array type.

        For example with two active features out of five possible features
        this would return a vector like `[0 0 1 0 1]`

        If this is just a padding vector we set all values to `-1`.
        padding vectors are specified by a `None` or `[None]`
        value for active_features."""

        num_features = len(input_feature_map.keys())
        if active_features is None or None in active_features:
            return np.ones(num_features, dtype=np.int32) * -1
        else:
            # we are going to use floats and convert to int later if possible
            used_features = np.zeros(num_features, dtype=float)
            using_only_ints = True
            best_intent = None
            best_intent_prob = 0.0

            for feature_name, prob in active_features.items():
                if feature_name.startswith('intent_'):
                    if prob >= best_intent_prob:
                        best_intent = feature_name
                        best_intent_prob = prob
                elif feature_name in input_feature_map:
                    if prob != 0.0:
                        idx = input_feature_map[feature_name]
                        used_features[idx] = prob
                        using_only_ints = using_only_ints and utils.is_int(
                            prob)
                else:
                    logger.debug(
                        "Feature '{}' (value: '{}') could not be found in "
                        "feature map. Make sure you added all intents and "
                        "entities to the domain".format(feature_name, prob))

            if best_intent is not None:
                # finding the maximum confidence intent and
                # appending it to the active_features val
                index_in_feature_list = input_feature_map.get(best_intent)
                if index_in_feature_list is not None:
                    used_features[index_in_feature_list] = 1
                else:
                    logger.warn(
                        "Couldn't set most probable feature '{}', "
                        "it wasn't found in the feature list of the domain."
                        " Make sure you added all intents and "
                        "entities to the domain.".format(best_intent))

            if using_only_ints:
                # this is an optimization - saves us a bit of memory
                return used_features.astype(np.int32)
            else:
                return used_features
Exemplo n.º 2
0
    def encode(self, state):
        # type: (Dict[Text, float]) -> np.ndarray
        """Returns a binary vector indicating which features are active.

            Given a dictionary of states (e.g. 'intent_greet',
            'prev_action_listen',...) return a binary vector indicating which
            features of `self.input_features` are in the bag. NB it's a
            regular double precision float array type.

            For example with two active features out of five possible features
            this would return a vector like `[0 0 1 0 1]`

            If intent features are given with a probability, for example
            with two active features and two uncertain intents out
            of five possible features this would return a vector
            like `[0.3, 0.7, 1.0, 0, 1.0]`.

            If this is just a padding vector we set all values to `-1`.
            padding vectors are specified by a `None` or `[None]`
            value for states.
        """

        if not self.num_features:
            raise Exception("BinarySingleStateFeaturizer "
                            "was not prepared "
                            "before encoding.")

        if state is None or None in state:
            return np.ones(self.num_features, dtype=np.int32) * -1

        # we are going to use floats and convert to int later if possible
        used_features = np.zeros(self.num_features, dtype=np.float)
        using_only_ints = True
        for state_name, prob in state.items():
            if state_name in self.input_state_map:
                idx = self.input_state_map[state_name]
                used_features[idx] = prob
                using_only_ints = using_only_ints and utils.is_int(prob)
            else:
                logger.debug(
                    "Feature '{}' (value: '{}') could not be found in "
                    "feature map. Make sure you added all intents and "
                    "entities to the domain".format(state_name, prob))

        if using_only_ints:
            # this is an optimization - saves us a bit of memory
            return used_features.astype(np.int32)
        else:
            return used_features
Exemplo n.º 3
0
    def encode(self, state):
        # type: (Dict[Text, float]) -> np.ndarray
        """Returns a binary vector indicating which features are active.

            Given a dictionary of states (e.g. 'intent_greet',
            'prev_action_listen',...) return a binary vector indicating which
            features of `self.input_features` are in the bag. NB it's a
            regular double precision float array type.

            For example with two active features out of five possible features
            this would return a vector like `[0 0 1 0 1]`

            If intent features are given with a probability, for example
            with two active features and two uncertain intents out
            of five possible features this would return a vector
            like `[0.3, 0.7, 1.0, 0, 1.0]`.

            If this is just a padding vector we set all values to `-1`.
            padding vectors are specified by a `None` or `[None]`
            value for states.
        """

        if not self.num_features:
            raise Exception("BinarySingleStateFeaturizer "
                            "was not prepared "
                            "before encoding.")

        if state is None or None in state:
            return np.ones(self.num_features, dtype=np.int32) * -1

        # we are going to use floats and convert to int later if possible
        used_features = np.zeros(self.num_features, dtype=np.float)
        using_only_ints = True
        for state_name, prob in state.items():
            if state_name in self.input_state_map:
                idx = self.input_state_map[state_name]
                used_features[idx] = prob
                using_only_ints = using_only_ints and utils.is_int(prob)
            else:
                logger.debug(
                        "Feature '{}' (value: '{}') could not be found in "
                        "feature map. Make sure you added all intents and "
                        "entities to the domain".format(state_name, prob))

        if using_only_ints:
            # this is an optimization - saves us a bit of memory
            return used_features.astype(np.int32)
        else:
            return used_features
Exemplo n.º 4
0
    def encode(self, state):
        # type: (Dict[Text, float]) -> np.ndarray
        if not self.num_features:
            raise Exception("LabelTokenizerSingleStateFeaturizer "
                            "was not prepared before encoding.")

        if state is None or None in state:
            return np.ones(self.num_features, dtype=np.int32) * -1

        # we are going to use floats and convert to int later if possible
        used_features = np.zeros(self.num_features, dtype=np.float)
        using_only_ints = True
        for state_name, prob in state.items():
            using_only_ints = using_only_ints and utils.is_int(prob)
            if state_name in self.user_labels:
                if PREV_PREFIX + ACTION_LISTEN_NAME in state:
                    # else we predict next action from bot action and memory
                    for t in state_name.split(self.split_symbol):
                        used_features[self.user_vocab[t]] += prob

            elif state_name in self.slot_labels:
                offset = len(self.user_vocab)
                idx = self.slot_labels.index(state_name)
                used_features[offset + idx] += prob

            elif state_name[len(PREV_PREFIX):] in self.bot_labels:
                action_name = state_name[len(PREV_PREFIX):]
                for t in action_name.split(self.split_symbol):
                    offset = len(self.user_vocab) + len(self.slot_labels)
                    idx = self.bot_vocab[t]
                    used_features[offset + idx] += prob

            else:
                logger.warning(
                    "Feature '{}' could not be found in "
                    "feature map.".format(state_name))

        if using_only_ints:
            # this is an optimization - saves us a bit of memory
            return used_features.astype(np.int32)
        else:
            return used_features
Exemplo n.º 5
0
    def encode(self, state):
        # type: (Dict[Text, float]) -> np.ndarray
        if not self.num_features:
            raise Exception("LabelTokenizerSingleStateFeaturizer "
                            "was not prepared before encoding.")

        if state is None or None in state:
            return np.ones(self.num_features, dtype=np.int32) * -1

        # we are going to use floats and convert to int later if possible
        used_features = np.zeros(self.num_features, dtype=np.float)
        using_only_ints = True
        for state_name, prob in state.items():
            using_only_ints = using_only_ints and utils.is_int(prob)
            if state_name in self.user_labels:
                if PREV_PREFIX + ACTION_LISTEN_NAME in state:
                    # else we predict next action from bot action and memory
                    for t in state_name.split(self.split_symbol):
                        used_features[self.user_vocab[t]] += prob

            elif state_name in self.slot_labels:
                offset = len(self.user_vocab)
                idx = self.slot_labels.index(state_name)
                used_features[offset + idx] += prob

            elif state_name[len(PREV_PREFIX):] in self.bot_labels:
                action_name = state_name[len(PREV_PREFIX):]
                for t in action_name.split(self.split_symbol):
                    offset = len(self.user_vocab) + len(self.slot_labels)
                    idx = self.bot_vocab[t]
                    used_features[offset + idx] += prob

            else:
                logger.warning("Feature '{}' could not be found in "
                               "feature map.".format(state_name))

        if using_only_ints:
            # this is an optimization - saves us a bit of memory
            return used_features.astype(np.int32)
        else:
            return used_features
Exemplo n.º 6
0
def test_is_int():
    assert utils.is_int(1)
    assert utils.is_int(1.0)
    assert not utils.is_int(None)
    assert not utils.is_int(1.2)
    assert not utils.is_int("test")
Exemplo n.º 7
0
def test_is_int():
    assert is_int(1)
    assert is_int(1.0)
    assert not is_int(None)
    assert not is_int(1.2)
    assert not is_int("test")
Exemplo n.º 8
0
def test_is_int():
    assert utils.is_int(1)
    assert utils.is_int(1.0)
    assert not utils.is_int(None)
    assert not utils.is_int(1.2)
    assert not utils.is_int("test")