def setUp(self):
        data_path = (os.path.join(os.path.dirname(__file__),
                                  "test_participant.json"))

        TFIDF = False
        N_GRAMS = (1, 2)

        self.data = TrainData.load(data_path)

        train_ids = [t.ids for t in self.data.data["1.ABC"]]
        flat_train_ids = list(self.data.data["1.ABC"].ids)
        self.train_cntxt, self.train_acts = self._format_cntxt_indices(
            train_ids)

        self.X_speech, vectorizer = get_bow_features(self.data,
                                                     tfidf=TFIDF,
                                                     n_grams=N_GRAMS,
                                                     max_features=None)
        self.X_speech = self.X_speech[flat_train_ids, :]

        model_gen = SpeechModel.model_generator(SGDClassifier,
                                                loss='log',
                                                average=True,
                                                penalty='l2',
                                                alpha=0.0002)

        self.combined_model = CombinedModel(vectorizer,
                                            model_gen,
                                            ALL_ACTIONS,
                                            speech_eps=0.15,
                                            context_eps=0.15)

        self.test_utter = "The green piece with two black stripes"
        self.test_cntxt = []
def plot_trial(trial, bag):
    model_path = os.path.join(args.model_path, args.participant, str(trial),
                              "model_initial")
    fig_path = os.path.join(
        os.path.dirname(__file__), "figs", args.participant, str(trial))

    if not os.path.exists(fig_path):
        os.makedirs(fig_path)

    model = CombinedModel.load_from_path(model_path, ALL_ACTIONS,
                                         JointModel.model_generator(
                                             SGDClassifier,
                                             **SPEECH_MODEL_PARAMETERS),
                                         SPEECH_EPS, CONTEXT_EPS)

    row = 4
    col = 6

    cntxt = []
    i = 0

    for m in bag.read_messages():
        if m.topic == TOPIC:
            model.predict(cntxt, m.message.utter, plot=True)

            cntxt.append(m.message.action)
            i += 1

            plt.tight_layout()

            path = os.path.join(fig_path, "sample_{}_{}".format(
                m.message.result, i))
            plt.savefig(path, format="svg")
            plt.clf()
Exemplo n.º 3
0
 def _get_model(self, participant, trial):
     if trial == 0:
         model_type = "model_initial"
         t = 0
     else:
         model_type = "model_final"
         t = trial - 1
     model_path = os.path.join(args.model_path, participant, str(t + 1),
                               model_type)
     return CombinedModel.load_from_path(
         model_path, ALL_ACTIONS,
         JointModel.model_generator(SGDClassifier,
                                    **SPEECH_MODEL_PARAMETERS), SPEECH_EPS,
         CONTEXT_EPS)
Exemplo n.º 4
0
def train_combined_model(speech_eps,
                         context_eps,
                         fit_type="incremental",
                         tfidf=False,
                         n_grams=(1, 2),
                         speech_model_class=JointModel,
                         speech_model_parameters={},
                         init_new_speech_actions=False):

    path = defaults.DATA_PATH
    print("PATH: ", os.path.join(path, "train.json"))

    data = TrainData.load(os.path.join(path, "train.json"))

    flat_train_ids = [i for p in TRAIN_PARTICIPANTS for i in data.data[p].ids]
    train_ids_by_trial = [
        trial.ids for p in TRAIN_PARTICIPANTS for trial in data.data[p]
    ]

    # Get features
    train_context, labels = format_cntxt_indices(data, train_ids_by_trial)
    X_speech, vectorizer = get_bow_features(data,
                                            tfidf=tfidf,
                                            n_grams=n_grams,
                                            max_features=None)
    X_speech = X_speech[flat_train_ids, :]

    model_gen = JointModel.model_generator(speech_model_class,
                                           **speech_model_parameters)

    combined_model = CombinedModel(vectorizer,
                                   model_gen,
                                   ALL_ACTIONS,
                                   speech_eps=speech_eps,
                                   context_eps=context_eps)

    if "incremental" in fit_type:
        combined_model.partial_fit(train_context, X_speech, labels)
    elif "offline" in fit_type:
        combined_model.fit(train_context, X_speech, labels)

    if init_new_speech_actions:
        if "incremental" not in fit_type:
            raise NotImplementedError(
                "Can't add speech data on offline speech")
        update_speech_for_new_actions(combined_model.speech_model,
                                      combined_model._vectorizer,
                                      weight=len(labels) * 1. /
                                      len(ALL_ACTIONS))

    return combined_model
class TestCombinedModel(TestCase):
    def setUp(self):
        data_path = (os.path.join(os.path.dirname(__file__),
                                  "test_participant.json"))

        TFIDF = False
        N_GRAMS = (1, 2)

        self.data = TrainData.load(data_path)

        train_ids = [t.ids for t in self.data.data["1.ABC"]]
        flat_train_ids = list(self.data.data["1.ABC"].ids)
        self.train_cntxt, self.train_acts = self._format_cntxt_indices(
            train_ids)

        self.X_speech, vectorizer = get_bow_features(self.data,
                                                     tfidf=TFIDF,
                                                     n_grams=N_GRAMS,
                                                     max_features=None)
        self.X_speech = self.X_speech[flat_train_ids, :]

        model_gen = SpeechModel.model_generator(SGDClassifier,
                                                loss='log',
                                                average=True,
                                                penalty='l2',
                                                alpha=0.0002)

        self.combined_model = CombinedModel(vectorizer,
                                            model_gen,
                                            ALL_ACTIONS,
                                            speech_eps=0.15,
                                            context_eps=0.15)

        self.test_utter = "The green piece with two black stripes"
        self.test_cntxt = []

    def test_n_children(self):
        self.combined_model.fit(self.train_cntxt, self.X_speech,
                                self.train_acts)
        self.assertEqual(self.combined_model.context_model.root.n_children, 2)

    def test_predict(self):
        self.combined_model.fit(self.train_cntxt, self.X_speech,
                                self.train_acts)
        act, probs = self.combined_model.predict(self.test_cntxt,
                                                 self.test_utter)
        self.assert_("foot" in act)

    def test_prob_normilization(self):
        self.combined_model.fit(self.train_cntxt, self.X_speech,
                                self.train_acts)
        c_probs = self.combined_model.get_context_probs(self.test_cntxt)
        s_probs = self.combined_model.get_speech_probs(self.test_utter)

        self.assertAlmostEqual(sum(c_probs), 1.0, places=4)
        self.assertAlmostEqual(sum(s_probs), 1.0, places=4)

    def test_incremental_learning(self):
        train_ctxt_1 = [[]]
        train_ctxt_2 = [[]]

        train_utter_1 = "blue piece with two white stripes"
        train_utter_2 = "I am feeling fat and sassy"

        act_1 = ["top_2"]
        act_2 = ["top_1"]

        self.combined_model.partial_fit(train_ctxt_1, train_utter_1, act_1)
        self.combined_model.partial_fit(train_ctxt_2, train_utter_2, act_2)

        self.assertEqual(self.combined_model.context_model.root.n_children, 2)

    def _format_cntxt_indices(self, indices):
        cntxts = []
        actions = []

        all_labels = list(self.data.labels)
        for t in indices:
            cntxt = []
            for i in t:
                label = all_labels[i]
                cntxts.append([c for c in cntxt])
                actions.append(label)

                cntxt.append(label)

        return cntxts, actions
Exemplo n.º 6
0
 def _load_model(self, model_path, speech_eps, context_eps):
     self.model = CM.load_from_path(model_path, ALL_ACTIONS,
                                    JointModel.model_generator(
                                        SGDClassifier,
                                        **SPEECH_MODEL_PARAMETERS),
                                    speech_eps, context_eps)