def run(model_save_path, figure_save_path, threshold_c_value, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue threshold_group_by_state = {} mean_curve_group_by_state = {} for state_no in model_group_by_state: all_log_curves_of_this_state = [] curve_owner = [] for trial_name in trials_group_by_folder_name: curve_owner.append(trial_name) one_log_curve_of_this_state = [] one_log_curve_of_this_state = util.fast_log_curve_calculation( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[state_no]) all_log_curves_of_this_state.append(one_log_curve_of_this_state) # use np matrix to facilitate the computation of mean curve and std np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state) mean_of_log_curve = np_matrix_traj_by_time.mean(0) diff_traj_by_time = np_matrix_traj_by_time - mean_of_log_curve deri_of_diff_traj_by_time = diff_traj_by_time[:, 1:] - diff_traj_by_time[:, : -1] mean_curve_group_by_state[state_no] = mean_of_log_curve threshold_group_by_state[state_no] = assess_threshold_and_decide( deri_of_diff_traj_by_time, curve_owner, state_no, figure_save_path, ) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) if len(threshold_group_by_state) != 0: joblib.dump(threshold_group_by_state, model_save_path + "/threshold_for_deri_of_diff.pkl") joblib.dump(mean_curve_group_by_state, model_save_path + "/mean_curve_group_by_state.pkl")
def run(model_save_path, figure_save_path, threshold_c_value, trials_group_by_folder_name, data_class, ): output_dir = os.path.join( figure_save_path, "gradient_of_log_likelihood_plot", data_class, ) if not os.path.isdir(output_dir): os.makedirs(output_dir) trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues().next() state_amount = len(one_trial_data_group_by_state) threshold_constant = 10 threshold_offset = 10 model_group_by_state = {} for state_no in range(1, state_amount+1): try: model_group_by_state[state_no] = joblib.load(model_save_path+"/model_s%s.pkl"%(state_no,)) except IOError: print 'model of state %s not found'%(state_no,) continue for state_no in model_group_by_state: all_log_curves_of_this_state = [] curve_owner = [] for trial_name in trials_group_by_folder_name: curve_owner.append(trial_name) one_log_curve_of_this_state = [] one_log_curve_of_this_state = util.fast_log_curve_calculation( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[state_no] ) all_log_curves_of_this_state.append(one_log_curve_of_this_state) # use np matrix to facilitate the computation of mean curve and std np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state) assess_threshold_and_decide( np_matrix_traj_by_time, curve_owner, state_no, output_dir, data_class, )
def run(model_save_path, figure_save_path, threshold_c_value, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) threshold_constant = 10 threshold_offset = 10 model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue expected_log = [] std_of_log = [] deri_threshold = [] for state_no in model_group_by_state: all_log_curves_of_this_state = [] list_of_log_prob_mat = [] log_prob_owner = [] for trial_name in trials_group_by_folder_name: log_prob_owner.append(trial_name) emission_log_prob_mat = util.get_emission_log_prob_matrix( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[state_no]) list_of_log_prob_mat.append(emission_log_prob_mat) one_log_curve_of_this_state = util.fast_log_curve_calculation( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[state_no]) all_log_curves_of_this_state.append(one_log_curve_of_this_state) # use np matrix to facilitate the computation of mean curve and std np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state) gradient_traj_by_time = np_matrix_traj_by_time[:, 1:] - np_matrix_traj_by_time[:, : -1] plot_log_prob_of_all_trials(gradient_traj_by_time, list_of_log_prob_mat, log_prob_owner, state_no, figure_save_path)
def run(model_save_path, figure_save_path, threshold_c_value, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) threshold_constant = 10 threshold_offset = 10 model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue for state_no in model_group_by_state: compute_score_time_cost = 0 total_step_times = 0 all_log_curves_of_this_state = [] curve_owner = [] for trial_name in trials_group_by_folder_name: curve_owner.append(trial_name) one_log_curve_of_this_state = [] start_time = time.time() one_log_curve_of_this_state = util.fast_log_curve_calculation( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[state_no]) compute_score_time_cost += time.time() - start_time total_step_times += len( trials_group_by_folder_name[trial_name][state_no]) all_log_curves_of_this_state.append(one_log_curve_of_this_state) # use np matrix to facilitate the computation of mean curve and std np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state) score_time_cost_per_point = float( compute_score_time_cost) / total_step_times assess_threshold_and_decide(np_matrix_traj_by_time, curve_owner, state_no, figure_save_path, score_time_cost_per_point)
def run(model_save_path, figure_save_path, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue fig = plt.figure(1) ax = fig.add_subplot(111) from matplotlib.pyplot import cm for trial_name in trials_group_by_folder_name: color = iter(cm.rainbow(np.linspace(0, 1, state_amount))) all_log_curves_of_this_model = [[]] for model_no in model_group_by_state: all_log_curves_of_this_model.append([]) for state_no in range(1, state_amount + 1): one_log_curve_of_this_model = util.fast_log_curve_calculation( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[model_no]) all_log_curves_of_this_model[model_no] = np.hstack([ all_log_curves_of_this_model[model_no], one_log_curve_of_this_model ]) ax.plot(all_log_curves_of_this_model[model_no], linestyle="solid", label='state_' + str(model_no), color=next(color)) title = ('skill_identification' + trial_name) ax.set_title(title) if not os.path.isdir(figure_save_path + '/skill_identification_plot'): os.makedirs(figure_save_path + '/skill_identification_plot') fig.savefig(os.path.join(figure_save_path, 'skill_identification_plot', title + ".jpg"), format="jpg") fig.show()
def run(model_save_path, figure_save_path, threshold_c_value, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) threshold_constant = 10 threshold_offset = 10 model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue expected_log = [] std_of_log = [] deri_threshold = [] for state_no in model_group_by_state: list_of_log_prob_mat = [] log_prob_owner = [] for trial_name in trials_group_by_folder_name: log_prob_owner.append(trial_name) hidden_state_log_prob = util.get_hidden_state_log_prob_matrix( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[state_no]) list_of_log_prob_mat.append(hidden_state_log_prob) # use np matrix to facilitate the computation of mean curve and std plot_log_prob_of_all_trials(list_of_log_prob_mat, log_prob_owner, state_no, figure_save_path)
def run(model_save_path, figure_save_path, threshold_c_value, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue expected_log = {} std_of_log = {} threshold = {} for state_no in model_group_by_state: compute_score_time_cost = 0 total_step_times = 0 all_log_curves_of_this_state = [] curve_owner = [] for trial_name in trials_group_by_folder_name: curve_owner.append(trial_name) one_log_curve_of_this_state = [] start_time = time.time() one_log_curve_of_this_state = util.fast_log_curve_calculation( trials_group_by_folder_name[trial_name][state_no], model_group_by_state[state_no]) compute_score_time_cost += time.time() - start_time total_step_times += len( trials_group_by_folder_name[trial_name][state_no]) all_log_curves_of_this_state.append(one_log_curve_of_this_state) # use np matrix to facilitate the computation of mean curve and std np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state) mean_of_log_curve = np_matrix_traj_by_time.mean(0) std_of_log_curve = np_matrix_traj_by_time.std(0) score_time_cost_per_point = float( compute_score_time_cost) / total_step_times decided_threshold_log_curve = assess_threshold_and_decide( threshold_c_value, mean_of_log_curve, std_of_log_curve, np_matrix_traj_by_time, curve_owner, state_no, figure_save_path, score_time_cost_per_point) expected_log[state_no] = mean_of_log_curve.tolist()[0] threshold[state_no] = decided_threshold_log_curve.tolist()[0] std_of_log[state_no] = std_of_log_curve.tolist()[0] if not os.path.isdir(model_save_path): os.makedirs(model_save_path) joblib.dump(expected_log, model_save_path + "/mean_of_log_likelihood.pkl") joblib.dump(threshold, model_save_path + "/threshold_for_log_likelihood.pkl") joblib.dump(std_of_log, model_save_path + "/std_of_log_likelihood.pkl")
def run(model_save_path, model_type, model_config, score_metric, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_trials = trials_group_by_folder_name.values() trials_amount = len(trials_group_by_folder_name) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) one_trial_data_group_by_state = list_of_trials[0] state_amount = len(one_trial_data_group_by_state) training_data_group_by_state = {} training_length_array_group_by_state = {} for state_no in range(1, state_amount + 1): length_array = [] for trial_no in range(len(list_of_trials)): length_array.append(list_of_trials[trial_no][state_no].shape[0]) if trial_no == 0: data_tempt = list_of_trials[trial_no][state_no] else: data_tempt = np.concatenate( (data_tempt, list_of_trials[trial_no][state_no]), axis=0) training_data_group_by_state[state_no] = data_tempt training_length_array_group_by_state[state_no] = length_array if not os.path.isdir(model_save_path): os.makedirs(model_save_path) for state_no in range(1, state_amount + 1): model_list = [] train_data = training_data_group_by_state[state_no] lengths = training_length_array_group_by_state[state_no] best_model, model_id = train_hmm_model(train_data, lengths) joblib.dump( best_model['model'], os.path.join(model_save_path, "model_s%s.pkl" % (state_no, ))) joblib.dump( best_model['now_model_config'], os.path.join(model_save_path, "model_s%s_config_%s.pkl" % (state_no, model_id))) joblib.dump( None, os.path.join( model_save_path, "model_s%s_score_%s.pkl" % (state_no, best_model['score']))) train_report = [{ util.get_model_config_id(i['now_model_config']): i['score'] } for i in sorted_model_list] import json json.dump(train_report, open( os.path.join( model_save_path, "model_s%s_training_report.json" % (state_no)), 'w'), separators=(',\n', ': ')) # plot the hidden state sequence for each state print print print 'Finish fitting the posterior model -> Generating the hidden state sequence...' print print model = best_model['model'] if model_type in ['hmmlearn\'s HMM', 'hmmlearn\'s GMMHMM']: _, model.z = model.decode(X, algorithm="viterbi") elif model_type == 'BNPY\'s HMM': model.z = model.decode(X, lengths) elif model_type == 'PYHSMM\'s HMM': model.z = model.model.stateseqs[0] else: print 'Sorry, this model cannot obtain the hidden state sequence' return # plt.close("all") # Xdf = pd.DataFrame(X) # plot the original multimodal signals # Xdf.plot() # im_data = np.tile(model.z, 2) # cmap =cm.get_cmap('jet',np.max(model.z)) # print np.unique(model.z) # ax.imshow(im_data[None], aspect='auto', interpolation='nearest', vmin = 0, vmax = np.max(model.z), cmap = cmap, alpha = 0.5) fig, ax = plt.subplots(nrows=1, ncols=1) trial_len = len(model.z) / trials_amount color = iter(cm.rainbow(np.linspace(0, 1, trials_amount))) zhat = [] for iTrial in range(trials_amount): zSeq = model.z[iTrial * trial_len:(iTrial + 1) * trial_len] ax.plot(zSeq, color=next(color)) #, linewidth=2.0 zhat.append(zSeq.tolist() + [zSeq[-1]]) plt.show() zdf = pd.DataFrame(zhat) plt.title('The hidden state_sequence of state_%d' % (state_no)) zdf.to_csv(model_save_path + '/zhat.csv', index=False)
def run(model_save_path, figure_save_path, trials_group_by_folder_name, state_order_group_by_folder_name, parsed_options): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue state_color = {} color = iter(cm.rainbow(np.linspace(0, 1, state_amount))) for state_no in model_group_by_state: state_color[state_no] = color.next() output_dir = os.path.join(figure_save_path, 'test_if_gradient_can_detect_state_switch') if not os.path.isdir(output_dir): os.makedirs(output_dir) trial_amount = len(trials_group_by_folder_name) subpolt_amount_for_each_trial = 2 subplot_per_row = 1 subplot_amount = trial_amount * subpolt_amount_for_each_trial row_amount = int(math.ceil(float(subplot_amount) / subplot_per_row)) fig, ax_mat = plt.subplots(nrows=row_amount, ncols=subplot_per_row) if row_amount == 1: ax_mat = ax_mat.reshape(1, -1) if subplot_per_row == 1: ax_mat = ax_mat.reshape(-1, 1) ax_list = [] for i in range(trial_amount): for k in range(subpolt_amount_for_each_trial): j = subpolt_amount_for_each_trial * i + k row_no = j / subplot_per_row col_no = j % subplot_per_row ax_list.append(ax_mat[row_no, col_no]) trial_count = -1 for trial_name in trials_group_by_folder_name: trial_count += 1 X = None state_start_idx = [0] state_order = state_order_group_by_folder_name[trial_name] for state_no in state_order: if X is None: X = trials_group_by_folder_name[trial_name][state_no] else: X = np.concatenate( (X, trials_group_by_folder_name[trial_name][state_no]), axis=0) state_start_idx.append(len(X)) plot_idx = trial_count * 2 ax_loglik = ax_list[plot_idx] ax_loglik_gradient = ax_list[plot_idx + 1] color_bg_by_state(state_order, state_color, state_start_idx, ax_loglik) color_bg_by_state(state_order, state_color, state_start_idx, ax_loglik_gradient) log_lik_mat = [] log_lik_gradient_mat = [] mat_row_color = [] mat_row_name = [] for state_no in model_group_by_state: log_lik_curve = np.array( util.fast_log_curve_calculation( X, model_group_by_state[state_no])) log_lik_gradient_curve = log_lik_curve[1:] - log_lik_curve[:-1] log_lik_mat.append(log_lik_curve) log_lik_gradient_mat.append(log_lik_gradient_curve) mat_row_color.append(state_color[state_no]) mat_row_name.append('state %s' % (state_no, )) log_lik_mat = np.matrix(log_lik_mat) log_lik_gradient_mat = np.matrix(log_lik_gradient_mat) log_lik_gradient_mat[log_lik_gradient_mat < 0] = 0 for row_no in range(log_lik_mat.shape[0]): ax_loglik.plot(log_lik_mat[row_no].tolist()[0], label=mat_row_name[row_no], color=mat_row_color[row_no]) ax_loglik_gradient.plot(log_lik_gradient_mat[row_no].tolist()[0], label=mat_row_name[row_no], color=mat_row_color[row_no]) title = "log-likelihood of %s HMM models" % state_amount ax_loglik.set_title(title) ax_loglik.set_ylabel('log probability') ax_loglik.set_xlabel('time step') title = "gradient of log-likelihood of %s HMM models" % state_amount ax_loglik_gradient.set_title(title) ax_loglik_gradient.set_ylabel('log probability') ax_loglik_gradient.set_xlabel('time step') title = "trial %s" % (trial_name, ) fig.set_size_inches(8 * subplot_per_row, 2 * row_amount) fig.tight_layout() fig.savefig(os.path.join(output_dir, "test_if_gradient_can_detect_state_switch.png"), format="png") fig.savefig(os.path.join(output_dir, "test_if_gradient_can_detect_state_switch.eps"), format="eps")
def run(model_save_path, model_type, figure_save_path, threshold_c_value, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_trials = trials_group_by_folder_name.values() one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) training_report_by_state = {} for state_no in range(1, state_amount + 1): try: training_report_by_state[state_no] = json.load( open( model_save_path + "/model_s%s_training_report.json" % (state_no, ), 'r')) except IOError: print 'training report of state %s not found' % (state_no, ) continue model_config_by_state = {} for state_no in training_report_by_state: best_model_record = training_report_by_state[state_no][0] best_model_id = best_model_record.keys()[0] model_config_by_state[state_no] = joblib.load( model_save_path + "/model_s%s_config_%s.pkl" % (state_no, best_model_id)) training_data_group_by_state = {} training_length_array_group_by_state = {} for state_no in training_report_by_state: length_array = [] for trial_no in range(len(list_of_trials)): length_array.append(list_of_trials[trial_no][state_no].shape[0]) if trial_no == 0: data_tempt = list_of_trials[trial_no][state_no] else: data_tempt = np.concatenate( (data_tempt, list_of_trials[trial_no][state_no]), axis=0) X = data_tempt lengths = length_array list_of_scored_models = training_report_by_state[state_no] model_config_template = model_config_by_state[state_no] for idx in range(len(list_of_scored_models)): model_id = list_of_scored_models[idx].keys()[0] model_score = list_of_scored_models[idx].values()[0] model_config = util.bring_model_id_back_to_model_config( model_id, model_config_template) model_config = _translate_into_new_config_paradigm(model_config) model = model_generation.model_factory(model_type, model_config) model = model.fit(X, lengths=lengths) all_log_curves_of_this_state = [] curve_owner = [] for trial_name in trials_group_by_folder_name: curve_owner.append(trial_name) one_log_curve_of_this_state = [] one_log_curve_of_this_state = util.fast_log_curve_calculation( trials_group_by_folder_name[trial_name][state_no], model, ) all_log_curves_of_this_state.append( one_log_curve_of_this_state) np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state) plot_trials_loglik_curves_of_one_state( np_matrix_traj_by_time, curve_owner, state_no, os.path.join(figure_save_path, 'check_if_score_metric_converge_loglik_curves', 'state_%s' % (state_no, )), title='state_%s_training_rank_%s_id_%s_score_%s' % (state_no, idx, model_id, model_score))
def run(model_save_path, trials_group_by_folder_name, parsed_options): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) one_trial_data_group_by_state = trials_group_by_folder_name.itervalues( ).next() state_amount = len(one_trial_data_group_by_state) model_group_by_state = {} for state_no in range(1, state_amount + 1): try: model_group_by_state[state_no] = joblib.load(model_save_path + "/model_s%s.pkl" % (state_no, )) except IOError: print 'model of state %s not found' % (state_no, ) continue base_dir = os.path.dirname(os.path.realpath(__file__)) exp_dir = os.path.join(base_dir, 'experiment_output', 'test_if_parallelity_can_be_restored') output_id = '(tamper_input)' tampered = False if parsed_options.tamper_transmat: output_id += '_(tamper_transmat)' tampered = True if parsed_options.tamper_startprob: output_id += '_(tamper_startprob)' tampered = True output_dir = os.path.join(exp_dir, output_id) if not os.path.isdir(output_dir): os.makedirs(output_dir) for state_no in model_group_by_state: X = one_trial_data_group_by_state[state_no] list_of_growing_viterbi_paths, n_samples, n_components = util.fast_growing_viterbi_paths_cal( X, model_group_by_state[state_no]) list_of_lock_t, n_samples, n_components = util.fast_viterbi_lock_t_cal( X, model_group_by_state[state_no]) util.output_growing_viterbi_path_img( list_of_growing_viterbi_paths, n_components, os.path.join( output_dir, 'check_if_viterbi_path_grow_incrementally_state_%s.png' % state_no, ), list_of_lock_t, ) util.visualize_viterbi_alog( X, model_group_by_state[state_no], os.path.join(output_dir, 'state %s visualized viterbi alog.png' % (state_no, ))) all_Xs = [trials_group_by_folder_name[trial_name][state_no]\ for trial_name in trials_group_by_folder_name] tampered_X, list_of_tampered_range = tamper_input_mat(X.copy(), all_Xs) model = model_group_by_state[state_no] profile_model(model, output_dir, 'state %s raw' % (state_no, )) if parsed_options.tamper_transmat: tamper_transmat(model) if parsed_options.tamper_startprob: tamper_startprob(model) if tampered: profile_model(model, output_dir, 'state %s tampered' % (state_no, )) log_transmat = util.get_log_transmat(model) log_lik_of_X = np.array(util.fast_log_curve_calculation(X, model)) framelogprob_of_X = np.array( util.get_emission_log_prob_matrix(X, model)) fwdlattice_of_X = util.get_hidden_state_log_prob_matrix(X, model) max_hstate_of_X = fwdlattice_of_X.argmax(1) the_term_of_X = [framelogprob_of_X[0][max_hstate_of_X[0]]] for t in range(1, len(max_hstate_of_X)): hs1 = max_hstate_of_X[t - 1] hs2 = max_hstate_of_X[t] the_term_of_X.append(framelogprob_of_X[t][hs2] + log_transmat[hs1][hs2]) profile_log_curve_cal(X, model, output_dir, 'state %s X' % (state_no, ), list_of_tampered_range) log_lik_of_tampered_X = np.array( util.fast_log_curve_calculation(tampered_X, model)) framelogprob_of_tampered_X = np.array( util.get_emission_log_prob_matrix(tampered_X, model)) fwdlattice_of_tampered_X = util.get_hidden_state_log_prob_matrix( tampered_X, model) max_hstate_of_tampered_X = fwdlattice_of_tampered_X.argmax(1) the_term_of_tampered_X = [ framelogprob_of_tampered_X[0][max_hstate_of_tampered_X[0]] ] for t in range(1, len(max_hstate_of_tampered_X)): hs1 = max_hstate_of_tampered_X[t - 1] hs2 = max_hstate_of_tampered_X[t] the_term_of_tampered_X.append(framelogprob_of_tampered_X[t][hs2] + log_transmat[hs1][hs2]) profile_log_curve_cal(tampered_X, model, output_dir, 'state %s tampered_X' % (state_no, ), list_of_tampered_range) deri_of_X = log_lik_of_X.copy() deri_of_X[1:] = log_lik_of_X[1:] - log_lik_of_X[:-1] deri_of_X[0] = 0 deri_of_tampered_X = log_lik_of_tampered_X.copy() deri_of_tampered_X[ 1:] = log_lik_of_tampered_X[1:] - log_lik_of_tampered_X[:-1] deri_of_tampered_X[0] = 0 diff = log_lik_of_X - log_lik_of_tampered_X fig = plt.figure() bbox_extra_artists = [] ax = fig.add_subplot(411) title = "log lik" ax.set_title(title) ax.plot(log_lik_of_X, color='black', marker='None', linestyle='solid', label='Normal') ax.plot(log_lik_of_tampered_X, color='blue', marker='None', linestyle='solid', label='Tampered') for r in list_of_tampered_range: ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5) lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) bbox_extra_artists.append(lgd) ax = fig.add_subplot(412) title = "1st deri" ax.set_title(title) ax.plot(deri_of_X, color='black', marker='None', linestyle='solid', label='Normal') ax.plot(deri_of_tampered_X, color='blue', marker='None', linestyle='solid', label='Tampered') for r in list_of_tampered_range: ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5) lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) bbox_extra_artists.append(lgd) ax = fig.add_subplot(413) title = "1st deri and max emission prob of Normal" ax.set_title(title) ax.plot(deri_of_X, color='black', marker='None', linestyle='solid', label='Normal 1st deri') ax.plot(the_term_of_X, color='red', marker='None', linestyle='solid', label='Normal the term') for r in list_of_tampered_range: ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5) lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) bbox_extra_artists.append(lgd) ax = fig.add_subplot(414) title = "1st deri and max emission prob of Tampered" ax.set_title(title) ax.plot(deri_of_tampered_X, color='blue', marker='None', linestyle='solid', label='Tampered 1st deri') ax.plot(the_term_of_tampered_X, color='red', marker='None', linestyle='solid', label='Tampered the term') for r in list_of_tampered_range: ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5) lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) bbox_extra_artists.append(lgd) title = "output_id %s state %s" % (output_id, state_no) fig.suptitle(title) plt.tight_layout() fig.savefig(os.path.join(output_dir, title + ".eps"), format="eps", bbox_extra_artists=bbox_extra_artists, bbox_inches='tight') fig.savefig(os.path.join(output_dir, title + ".png"), format="png", bbox_extra_artists=bbox_extra_artists, bbox_inches='tight')
def run(model_save_path, model_type, model_config, score_metric, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_trials = trials_group_by_folder_name.values() trials_amount = len(trials_group_by_folder_name) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) one_trial_data_group_by_state = list_of_trials[0] state_amount = len(one_trial_data_group_by_state) training_data_group_by_state = {} training_length_array_group_by_state = {} for state_no in range(1, state_amount + 1): length_array = [] for trial_no in range(len(list_of_trials)): length_array.append(list_of_trials[trial_no][state_no].shape[0]) if trial_no == 0: data_tempt = list_of_trials[trial_no][state_no] else: data_tempt = np.concatenate( (data_tempt, list_of_trials[trial_no][state_no]), axis=0) training_data_group_by_state[state_no] = data_tempt training_length_array_group_by_state[state_no] = length_array if not os.path.isdir(model_save_path): os.makedirs(model_save_path) for state_no in range(1, state_amount + 1): model_list = [] model_generator = model_generation.get_model_generator( model_type, model_config) X = training_data_group_by_state[state_no] lengths = training_length_array_group_by_state[state_no] lengths[ -1] -= 1 # Adapting for bnpy's observation is firt-order autoregressive gaussian for model, now_model_config in model_generator: print print '-' * 20 print 'in state', state_no, ' working on config:', now_model_config model = model.fit(X, lengths=lengths) #n_samples, n_features score = model_score.score(score_metric, model, X, lengths) if score == None: print "scorer says to skip this model, will do" continue model_list.append({ "model": model, "now_model_config": now_model_config, "score": score }) print 'score:', score print '=' * 20 print model_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x: x['score']) best = sorted_model_list[0] model_id = util.get_model_config_id(best['now_model_config']) joblib.dump( best['model'], os.path.join(model_save_path, "model_s%s.pkl" % (state_no, ))) joblib.dump( best['now_model_config'], os.path.join(model_save_path, "model_s%s_config_%s.pkl" % (state_no, model_id))) joblib.dump( None, os.path.join(model_save_path, "model_s%s_score_%s.pkl" % (state_no, best['score']))) train_report = [{ util.get_model_config_id(i['now_model_config']): i['score'] } for i in sorted_model_list] import json json.dump(train_report, open( os.path.join( model_save_path, "model_s%s_training_report.json" % (state_no)), 'w'), separators=(',\n', ': '))
def run(model_save_path, model_type, model_config, score_metric, trials_group_by_folder_name, test_trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_training_trial = trials_group_by_folder_name.values() test_trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( test_trials_group_by_folder_name) list_of_test_trial = test_trials_group_by_folder_name.values() if not os.path.isdir(model_save_path): os.makedirs(model_save_path) one_trial_data_group_by_state = list_of_training_trial[0] state_amount = len(one_trial_data_group_by_state) training_data_group_by_state = {} test_data_group_by_state = {} for state_no in range(1, state_amount + 1): training_data_group_by_state[state_no] = [] test_data_group_by_state[state_no] = [] for trial_no in range(len(list_of_training_trial)): training_data_group_by_state[state_no].append( list_of_training_trial[trial_no][state_no]) for trial_no in range(len(list_of_test_trial)): test_data_group_by_state[state_no].append( list_of_test_trial[trial_no][state_no]) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) for state_no in range(1, state_amount + 1): print 'state_no', state_no sorted_model_list = train_model.run( list_of_train_mat=training_data_group_by_state[state_no], list_of_test_mat=test_data_group_by_state[state_no], model_type=model_type, model_config=model_config, score_metric=score_metric, ) best = sorted_model_list[0] model_id = util.get_model_config_id(best['now_model_config']) joblib.dump( best['model'], os.path.join(model_save_path, "model_s%s.pkl" % (state_no, ))) joblib.dump( best['now_model_config'], os.path.join(model_save_path, "model_s%s_config_%s.pkl" % (state_no, model_id))) joblib.dump( None, os.path.join(model_save_path, "model_s%s_score_%s.pkl" % (state_no, best['score']))) train_report = [{ util.get_model_config_id(i['now_model_config']): i['score'] } for i in sorted_model_list] import json json.dump(train_report, open( os.path.join( model_save_path, "model_s%s_training_report.json" % (state_no)), 'w'), separators=(',\n', ': '))