def setUp(self): data_path = (os.path.join(os.path.dirname(__file__), "test_participant.json")) TFIDF = False N_GRAMS = (1, 2) self.data = TrainData.load(data_path) train_ids = [t.ids for t in self.data.data["1.ABC"]] flat_train_ids = list(self.data.data["1.ABC"].ids) self.train_cntxt, self.train_acts = self._format_cntxt_indices( train_ids) self.X_speech, vectorizer = get_bow_features(self.data, tfidf=TFIDF, n_grams=N_GRAMS, max_features=None) self.X_speech = self.X_speech[flat_train_ids, :] model_gen = JointModel.model_generator(SGDClassifier, loss='log', average=True, penalty='l2', alpha=0.0002) self.combined_model = CombinedModel(vectorizer, model_gen, ALL_ACTIONS, speech_eps=0.15, context_eps=0.15) self.test_utter = "The green piece with two black stripes" self.test_cntxt = []
def plot_trial(trial, bag): model_path = os.path.join(args.model_path, args.participant, str(trial), "model_initial") fig_path = os.path.join( os.path.dirname(__file__), "figs", args.participant, str(trial)) if not os.path.exists(fig_path): os.makedirs(fig_path) model = CombinedModel.load_from_path(model_path, ALL_ACTIONS, JointModel.model_generator( SGDClassifier, **SPEECH_MODEL_PARAMETERS), SPEECH_EPS, CONTEXT_EPS) row = 4 col = 6 cntxt = [] i = 0 for m in bag.read_messages(): if m.topic == TOPIC: model.predict(cntxt, m.message.utter, plot=True) cntxt.append(m.message.action) i += 1 plt.tight_layout() path = os.path.join(fig_path, "sample_{}_{}".format( m.message.result, i)) plt.savefig(path, format="svg") plt.clf()
def train_combined_model(speech_eps, context_eps, fit_type="incremental", tfidf=False, n_grams=(1, 2), speech_model_class=JointModel, speech_model_parameters={}, init_new_speech_actions=False): path = defaults.DATA_PATH print("PATH: ", os.path.join(path, "train.json")) data = TrainData.load(os.path.join(path, "train.json")) flat_train_ids = [i for p in TRAIN_PARTICIPANTS for i in data.data[p].ids] train_ids_by_trial = [ trial.ids for p in TRAIN_PARTICIPANTS for trial in data.data[p] ] # Get features train_context, labels = format_cntxt_indices(data, train_ids_by_trial) X_speech, vectorizer = get_bow_features(data, tfidf=tfidf, n_grams=n_grams, max_features=None) X_speech = X_speech[flat_train_ids, :] model_gen = JointModel.model_generator(speech_model_class, **speech_model_parameters) combined_model = CombinedModel(vectorizer, model_gen, ALL_ACTIONS, speech_eps=speech_eps, context_eps=context_eps) if "incremental" in fit_type: combined_model.partial_fit(train_context, X_speech, labels) elif "offline" in fit_type: combined_model.fit(train_context, X_speech, labels) if init_new_speech_actions: if "incremental" not in fit_type: raise NotImplementedError( "Can't add speech data on offline speech") update_speech_for_new_actions(combined_model.speech_model, combined_model._vectorizer, weight=len(labels) * 1. / len(ALL_ACTIONS)) return combined_model
def _get_model(self, participant, trial): if trial == 0: model_type = "model_initial" t = 0 else: model_type = "model_final" t = trial - 1 model_path = os.path.join(args.model_path, participant, str(t + 1), model_type) return CombinedModel.load_from_path( model_path, ALL_ACTIONS, JointModel.model_generator(SGDClassifier, **SPEECH_MODEL_PARAMETERS), SPEECH_EPS, CONTEXT_EPS)
#!/usr/bin/env python from sklearn.linear_model import SGDClassifier from matplotlib import pyplot as plt from hrc_speech_prediction.models import ( JointModel, get_path_from_cli_arguments) from hrc_speech_prediction.evaluation import Evaluation from hrc_speech_prediction.plots import plot_incremental_scores N_GRAMS = (1, 2) TFIDF = False working_path = get_path_from_cli_arguments() speech_model_gen = JointModel.model_generator( SGDClassifier, loss='log', average=True, penalty='l2', alpha=.0002) ev = Evaluation(speech_model_gen, working_path, n_grams=N_GRAMS, tfidf=TFIDF, model_variations={k: {'features': k} for k in ['speech', 'context', 'both']}) ev.evaluate_all() scores = ev.evaluate_incremental_learning(shuffle=False) for m in scores: plot_incremental_scores(scores[m], label=m) plt.legend() plt.show()
def _load_model(self, model_path, speech_eps, context_eps): self.model = CM.load_from_path(model_path, ALL_ACTIONS, JointModel.model_generator( SGDClassifier, **SPEECH_MODEL_PARAMETERS), speech_eps, context_eps)
TFIDF = False MODEL_PARAMS = { 'loss': 'log', 'average': True, 'penalty': 'l2', 'alpha': .02, 'max_iter': 100, 'tol': 1.e-3, } working_path = get_path_from_cli_arguments() fig_path = os.path.join(working_path, 'figs') if not os.path.isdir(fig_path): os.mkdir(fig_path) speech_model_gen = JointModel.model_generator(SGDClassifier, **MODEL_PARAMS) ev = Evaluation(speech_model_gen, working_path, n_grams=N_GRAMS, tfidf=TFIDF, model_variations={'speech': {'features': 'speech'}}) ev.evaluate_all() classes = list(set(ev.data.labels)) utterances = list(ev.data.utterances) digits = int(np.ceil(np.math.log10(len(utterances)))) plt.set_cmap('Blues') fig = plt.figure() for tst in TRAIN_PARTICIPANTS: train_idx = [i for p in TRAIN_PARTICIPANTS for i in list(ev.data.data[p].ids) if not p == tst] X_train = ev.get_Xs(train_idx)