示例#1
0
    def _create_session_data(
        self,
        training_data: "TrainingData",
        label_id_dict: Dict[Text, int],
        attribute: Text,
    ) -> "train_utils.SessionData":
        """Prepare data for training and create a SessionData object"""

        X = []
        label_ids = []
        Y = []

        for e in training_data.intent_examples:
            if e.get(attribute):
                X.append(
                    e.get(
                        MESSAGE_VECTOR_FEATURE_NAMES[MESSAGE_TEXT_ATTRIBUTE]))
                label_ids.append(label_id_dict[e.get(attribute)])

        X = np.array(X)
        label_ids = np.array(label_ids)

        for label_id_idx in label_ids:
            Y.append(self._encoded_all_label_ids[label_id_idx])

        Y = np.array(Y)

        return train_utils.SessionData(X=X, Y=Y, label_ids=label_ids)
示例#2
0
    def _create_session_data(
        self, data_X: "np.ndarray", data_Y: Optional["np.ndarray"] = None
    ) -> "train_utils.SessionData":
        """Combine all tf session related data into a named tuple"""

        if data_Y is not None:
            # training time
            label_ids = self._label_ids_for_Y(data_Y)
            Y = self._label_features_for_Y(label_ids)

            # idea taken from sklearn's stratify split
            if label_ids.ndim == 2:
                # for multi-label y, map each distinct row to a string repr
                # using join because str(row) uses an ellipsis if len(row) > 1000
                label_ids = np.array([" ".join(row.astype("str")) for row in label_ids])
        else:
            # prediction time
            label_ids = None
            Y = None

        return train_utils.SessionData(X=data_X, Y=Y, label_ids=label_ids)