Exemplo n.º 1
0
def predict_proba(X_test, class_names):
    # load trained anomaly models
    anomaly_model_group_by_label = {}
    anomaly_data_path = training_config.anomaly_data_path
    folders = os.listdir(anomaly_data_path)
    for fo in folders:
        anomaly_model_path = os.path.join(
            training_config.anomaly_model_save_path, fo,
            training_config.config_by_user['data_type_chosen'],
            training_config.config_by_user['model_type_chosen'],
            training_config.model_id)
        try:
            anomaly_model_group_by_label[fo] = joblib.load(anomaly_model_path +
                                                           "/model_s%s.pkl" %
                                                           (1, ))
        except IOError:
            print 'anomaly model of  %s not found' % (fo, )
            ipdb.set_trace()
            raw_input("sorry! cann't load the anomaly model")
            continue

    predict_score = []
    calc_cofidence_resourse = []
    for i in range(len(X_test)):
        temp_loglik = []
        for model_label in class_names:
            one_log_curve_of_this_model = util.fast_log_curve_calculation(
                X_test[i], anomaly_model_group_by_label[model_label])
            temp_loglik.append(one_log_curve_of_this_model[-1])
        temp_score = temp_loglik / np.sum(temp_loglik)
        predict_score.append(temp_score)
    return np.array(predict_score)
Exemplo n.º 2
0
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    threshold_group_by_state = {}
    mean_curve_group_by_state = {}

    for state_no in model_group_by_state:

        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = []

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)
        mean_of_log_curve = np_matrix_traj_by_time.mean(0)
        diff_traj_by_time = np_matrix_traj_by_time - mean_of_log_curve
        deri_of_diff_traj_by_time = diff_traj_by_time[:,
                                                      1:] - diff_traj_by_time[:, :
                                                                              -1]
        mean_curve_group_by_state[state_no] = mean_of_log_curve
        threshold_group_by_state[state_no] = assess_threshold_and_decide(
            deri_of_diff_traj_by_time,
            curve_owner,
            state_no,
            figure_save_path,
        )

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    if len(threshold_group_by_state) != 0:
        joblib.dump(threshold_group_by_state,
                    model_save_path + "/threshold_for_deri_of_diff.pkl")
        joblib.dump(mean_curve_group_by_state,
                    model_save_path + "/mean_curve_group_by_state.pkl")
Exemplo n.º 3
0
def run(model_save_path, 
    figure_save_path,
    threshold_c_value,
    trials_group_by_folder_name,
    data_class,
):

    output_dir = os.path.join(
        figure_save_path,
        "gradient_of_log_likelihood_plot",
        data_class,
    )
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
        
    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues().next()
    state_amount = len(one_trial_data_group_by_state)

    threshold_constant = 10
    threshold_offset = 10

    model_group_by_state = {}
    for state_no in range(1, state_amount+1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path+"/model_s%s.pkl"%(state_no,))
        except IOError:
            print 'model of state %s not found'%(state_no,)
            continue

    for state_no in model_group_by_state:


        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = [] 

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no]
            )


            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std 
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)


        assess_threshold_and_decide(
            np_matrix_traj_by_time, 
            curve_owner, 
            state_no, 
            output_dir, 
            data_class,
        )
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    threshold_constant = 10
    threshold_offset = 10

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    expected_log = []
    std_of_log = []
    deri_threshold = []

    for state_no in model_group_by_state:

        all_log_curves_of_this_state = []
        list_of_log_prob_mat = []
        log_prob_owner = []
        for trial_name in trials_group_by_folder_name:
            log_prob_owner.append(trial_name)

            emission_log_prob_mat = util.get_emission_log_prob_matrix(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            list_of_log_prob_mat.append(emission_log_prob_mat)

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)
        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)

        gradient_traj_by_time = np_matrix_traj_by_time[:,
                                                       1:] - np_matrix_traj_by_time[:, :
                                                                                    -1]

        plot_log_prob_of_all_trials(gradient_traj_by_time,
                                    list_of_log_prob_mat, log_prob_owner,
                                    state_no, figure_save_path)
Exemplo n.º 5
0
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    threshold_constant = 10
    threshold_offset = 10

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    for state_no in model_group_by_state:
        compute_score_time_cost = 0
        total_step_times = 0

        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = []

            start_time = time.time()

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            compute_score_time_cost += time.time() - start_time
            total_step_times += len(
                trials_group_by_folder_name[trial_name][state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)

        score_time_cost_per_point = float(
            compute_score_time_cost) / total_step_times

        assess_threshold_and_decide(np_matrix_traj_by_time, curve_owner,
                                    state_no, figure_save_path,
                                    score_time_cost_per_point)
    def get_anomaly_detection_msg(self, arrived_data, arrived_state,
                                  data_header):

        hmm_log = Hmm_Log()

        arrived_length = len(arrived_data)
        if arrived_length < 10:
            return hmm_log

        if arrived_state <= 0:
            return hmm_log

        try:
            self.expected_log_group_by_state[arrived_state][arrived_length - 1]
            self.threshold_group_by_state[arrived_state][arrived_length - 1]
            log_curve = util.fast_log_curve_calculation(
                arrived_data, self.model_group_by_state[arrived_state])

            idx = arrived_length - 2
            prev_threshold = self.threshold_group_by_state[arrived_state][idx]
            prev_log_lik = log_curve[idx]
            prev_diff = prev_log_lik - prev_threshold

            idx = arrived_length - 1
            now_threshold = self.threshold_group_by_state[arrived_state][idx]
            now_log_lik = log_curve[idx]
            now_diff = now_log_lik - now_threshold

            deri_of_diff = now_diff - prev_diff

            if abs(deri_of_diff) < self.deri_threshold:
                hmm_log.event_flag = 1
            else:
                hmm_log.event_flag = 0
            hmm_log.current_log.data = now_log_lik
            hmm_log.expected_log.data = self.expected_log_group_by_state[
                arrived_state][idx]
            hmm_log.threshold.data = now_threshold
            hmm_log.diff_btw_curlog_n_thresh.data = now_diff
            hmm_log.deri_of_diff_btw_curlog_n_thresh.data = deri_of_diff
            hmm_log.header = data_header
            hmm_log.header.stamp = rospy.Time.now()
        except IndexError:
            rospy.loginfo(
                'received data is longer than the threshold. DTW needed.')

        return hmm_log
def run(model_save_path, figure_save_path, trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

        fig = plt.figure(1)
        ax = fig.add_subplot(111)
        from matplotlib.pyplot import cm
    for trial_name in trials_group_by_folder_name:
        color = iter(cm.rainbow(np.linspace(0, 1, state_amount)))
        all_log_curves_of_this_model = [[]]
        for model_no in model_group_by_state:
            all_log_curves_of_this_model.append([])
            for state_no in range(1, state_amount + 1):
                one_log_curve_of_this_model = util.fast_log_curve_calculation(
                    trials_group_by_folder_name[trial_name][state_no],
                    model_group_by_state[model_no])
                all_log_curves_of_this_model[model_no] = np.hstack([
                    all_log_curves_of_this_model[model_no],
                    one_log_curve_of_this_model
                ])
            ax.plot(all_log_curves_of_this_model[model_no],
                    linestyle="solid",
                    label='state_' + str(model_no),
                    color=next(color))
        title = ('skill_identification' + trial_name)
        ax.set_title(title)
        if not os.path.isdir(figure_save_path + '/skill_identification_plot'):
            os.makedirs(figure_save_path + '/skill_identification_plot')
        fig.savefig(os.path.join(figure_save_path, 'skill_identification_plot',
                                 title + ".jpg"),
                    format="jpg")
    fig.show()
Exemplo n.º 8
0
    def predict(self, x_test, class_names):
        # load trained anomaly models
        anomaly_model_group_by_label = {}
        for fo in class_names:
            anomaly_model_path = os.path.join(
                training_config.anomaly_model_save_path, fo)
            try:
                anomaly_model_group_by_label[fo] = joblib.load(
                    anomaly_model_path + "/model_s%s.pkl" % (1, ))
            except IOError:
                print 'anomaly model of  %s not found' % (fo, )
                raw_input("sorry! cann't load the anomaly model")
                continue

        y_pred = []
        for i in range(len(x_test)):
            # plot
            # fig = plt.figure()
            # ax = fig.add_subplot(111)
            # from matplotlib.pyplot import cm
            # color = iter(cm.rainbow(np.linspace(0, 1, len(anomaly_model_group_by_label))))
            calc_cofidence_resourse = []
            for idx, model_label in enumerate(class_names):
                one_log_curve_of_this_model = util.fast_log_curve_calculation(
                    x_test[i], anomaly_model_group_by_label[model_label])
                calc_cofidence_resourse.append({
                    'model_idx':
                    idx,
                    'model_label':
                    model_label,
                    'culmulative_loglik':
                    one_log_curve_of_this_model[-1],
                })
            #     c = next(color)
            #     plot_line, = ax.plot(one_log_curve_of_this_model, linestyle="solid", color = c)
            #     plot_line.set_label(model_label)
            # title = ('Anomaly_identification for ' + fo)
            # ax.set_title(title)
            # plt.savefig('images/'+str(i) + '.png', dpi=120)
            sorted_list = sorted(calc_cofidence_resourse,
                                 key=lambda x: x['culmulative_loglik'])
            optimal_result = sorted_list[-1]
            classified_idx = optimal_result['model_idx']
            y_pred.append(classified_idx)
        return y_pred
Exemplo n.º 9
0
def predict(X_test):
    # load trained anomaly models
    anomaly_model_group_by_label = {}
    anomaly_data_path = os.path.join(
        training_config.config_by_user['base_path'], 'all_anomalies')
    folders = os.listdir(anomaly_data_path)
    for fo in folders:
        anomaly_model_path = os.path.join(
            training_config.anomaly_model_save_path, fo,
            training_config.config_by_user['data_type_chosen'],
            training_config.config_by_user['model_type_chosen'],
            training_config.model_id)
        try:
            anomaly_model_group_by_label[fo] = joblib.load(anomaly_model_path +
                                                           "/model_s%s.pkl" %
                                                           (1, ))
        except IOError:
            print 'anomaly model of  %s not found' % (fo, )
            ipdb.set_trace()
            raw_input("sorry! cann't load the anomaly model")
            continue

    predict_class = []
    calc_cofidence_resourse = []
    for i in range(len(X_test)):
        for model_label in anomaly_model_group_by_label:
            one_log_curve_of_this_model = util.fast_log_curve_calculation(
                X_test[i], anomaly_model_group_by_label[model_label])
            calc_cofidence_resourse.append({
                'model_label':
                model_label,
                'culmulative_loglik':
                one_log_curve_of_this_model[-1],
            })
        sorted_list = sorted(calc_cofidence_resourse,
                             key=lambda x: x['culmulative_loglik'])
        optimal_result = sorted_list[-1]
        classified_model = optimal_result['model_label']
        predict_class.append(classified_model)
    return predict_class
Exemplo n.º 10
0
 def predict_proba(self, x_test, class_names):
     # load trained anomaly models
     anomaly_model_group_by_label = {}
     for fo in class_names:
         anomaly_model_path = os.path.join(
             training_config.anomaly_model_save_path, fo)
         try:
             anomaly_model_group_by_label[fo] = joblib.load(
                 anomaly_model_path + "/model_s%s.pkl" % (1, ))
         except IOError:
             print 'anomaly model of  %s not found' % (fo, )
             raw_input("sorry! cann't load the anomaly model")
             continue
     predict_score = []
     for i in range(len(x_test)):
         temp_loglik = []
         for model_label in class_names:
             one_log_curve_of_this_model = util.fast_log_curve_calculation(
                 x_test[i], anomaly_model_group_by_label[model_label])
             temp_loglik.append(one_log_curve_of_this_model[-1])
         temp_score = temp_loglik / np.sum(temp_loglik)
         predict_score.append(temp_score)
     return np.array(predict_score)
Exemplo n.º 11
0
def generate_performance_logging_report_with_varible_model_parameters():
    import model_generation
    # load the train/test/labels file
    TRAIN_TEST_DATASET_PATH = training_config.anomaly_data_path
    x_train_path = os.path.join(TRAIN_TEST_DATASET_PATH, "X_train.npy")
    y_train_path = os.path.join(TRAIN_TEST_DATASET_PATH, "y_train.npy")
    x_test_path = os.path.join(TRAIN_TEST_DATASET_PATH, "X_test.npy")
    y_test_path = os.path.join(TRAIN_TEST_DATASET_PATH, "y_test.npy")
    labels_path = os.path.join(TRAIN_TEST_DATASET_PATH, "labels_list.npy")
    try:
        x_train = np.load(x_train_path)
        y_train = np.load(y_train_path)
        x_test = np.load(x_test_path)
        y_test = np.load(y_test_path)
        labels = np.load(labels_path)
    except IOError:
        print(
            'Error occured trying to read the file, please check the path: ' +
            TRAIN_TEST_DATASET_PATH)
        sys.exit()
    x_train = x_train.transpose((0, 2, 1))
    x_test = x_test.transpose((0, 2, 1))
    y_train = y_train.reshape(-1, ).tolist()
    y_test = y_test.reshape(-1, ).tolist()
    class_names = labels.tolist()

    train_data_by_class = {}
    train_lengths_by_class = {}
    for idx, class_name in enumerate(class_names):
        indices = [i for i, label in enumerate(y_train) if label == idx]
        train_data = x_train[indices]
        lengths = []
        for i in range(len(train_data)):
            lengths.append(train_data[i].shape[0])
            if i == 0:
                data_tempt = train_data[i]
            else:
                data_tempt = np.concatenate((data_tempt, train_data[i]),
                                            axis=0)
        train_data = data_tempt
        train_data_by_class[class_name] = train_data
        lengths[-1] -= 1
        train_lengths_by_class[class_name] = lengths

    model_generator = model_generation.get_model_generator(
        training_config.model_type_chosen, training_config.model_config)
    for model, now_model_config in model_generator:
        logger.info(now_model_config)
        model_collection_for_all_classes = {}
        for idx, _name in enumerate(class_names):
            fitted_model = model.fit(
                train_data_by_class[_name],
                lengths=train_lengths_by_class[_name])  # n_samples, n_features
            # --- dump model and load it, confuse on this, but it works
            anomaly_model_path = os.path.join(
                training_config.anomaly_model_save_path,
                'temp_classification_report_model', _name)
            if not os.path.isdir(anomaly_model_path):
                os.makedirs(anomaly_model_path)
            joblib.dump(
                fitted_model,
                os.path.join(anomaly_model_path, "model_s%s.pkl" % (1, )))
            model_collection_for_all_classes[_name] = joblib.load(
                anomaly_model_path + "/model_s%s.pkl" % (1, ))

        y_pred = []
        for i in range(len(x_test)):
            calc_cofidence_resourse = []
            for idx, model_label in enumerate(class_names):
                one_log_curve_of_this_model = util.fast_log_curve_calculation(
                    x_test[i], model_collection_for_all_classes[model_label])
                calc_cofidence_resourse.append({
                    'model_idx':
                    idx,
                    'model_label':
                    model_label,
                    'culmulative_loglik':
                    one_log_curve_of_this_model[-1],
                })
            sorted_list = sorted(calc_cofidence_resourse,
                                 key=lambda x: x['culmulative_loglik'])
            optimal_result = sorted_list[-1]
            classified_idx = optimal_result['model_idx']
            y_pred.append(classified_idx)
        # for confusion matrix
        _clf_report = classification_report(
            y_test, y_pred, target_names=[l for l in class_names])
        logger.info(_clf_report)
Exemplo n.º 12
0
def score(score_metric, model, X, lengths):
    if score_metric == '_score_metric_worst_stdmeanratio_in_10_slice_':
        slice_10_time_step_log_lik = [[
            model.score(X[i:i + k * (j - i) / 10]) for k in range(1, 11, 1)
        ] for i, j in util.iter_from_X_lengths(X, lengths)]
        matrix = np.matrix(slice_10_time_step_log_lik)
        slice_10_means = abs(matrix.mean(0))
        slice_10_std = matrix.std(0)
        slice_10_stme_ratio = slice_10_std / slice_10_means
        score = slice_10_stme_ratio.max()

    elif score_metric == '_score_metric_last_time_stdmeanratio_':
        final_time_step_log_lik = [
            model.score(X[i:j])
            for i, j in util.iter_from_X_lengths(X, lengths)
        ]
        matrix = np.matrix(final_time_step_log_lik)
        mean = abs(matrix.mean())
        std = matrix.std()
        score = std / mean

    elif score_metric == '_score_metric_sum_stdmeanratio_using_fast_log_cal_':
        final_time_step_log_lik = [
            util.fast_log_curve_calculation(X[i:j], model)
            for i, j in util.iter_from_X_lengths(X, lengths)
        ]

        curve_mat = np.matrix(final_time_step_log_lik)
        mean_of_log_curve = curve_mat.mean(0)
        std_of_log_curve = curve_mat.std(0)
        score = abs(std_of_log_curve / mean_of_log_curve).mean()

    elif score_metric == '_score_metric_mean_of_std_using_fast_log_cal_':
        log_curves_of_all_trials = [
            util.fast_log_curve_calculation(X[i:j], model)
            for i, j in util.iter_from_X_lengths(X, lengths)
        ]

        curve_mat = np.matrix(log_curves_of_all_trials)
        std_of_log_curve = curve_mat.std(0)
        score = std_of_log_curve.mean()

    elif score_metric == '_score_metric_hamming_distance_using_fast_log_cal_':
        import scipy.spatial.distance as sp_dist
        log_lik = [
            util.fast_log_curve_calculation(X[i:j], model)
            for i, j in util.iter_from_X_lengths(X, lengths)
        ]
        log_mat = np.matrix(log_lik)
        std_of_log_mat = log_mat.std(0)
        mean_of_log_mat = log_mat.mean(0)
        lower_bound = mean_of_log_mat - 20 * std_of_log_mat
        ipdb.set_trace()
        hamming_score = sp_dist.hamming(mean_of_log_mat, lower_bound)
        score = hamming_score
    elif score_metric == '_score_metric_std_of_std_using_fast_log_cal_':
        log_curves_of_all_trials = [
            util.fast_log_curve_calculation(X[i:j], model)
            for i, j in util.iter_from_X_lengths(X, lengths)
        ]

        curve_mat = np.matrix(log_curves_of_all_trials)
        std_of_log_curve = curve_mat.std(0)
        score = std_of_log_curve.std()

    elif score_metric == '_score_metric_mean_of_std_divied_by_final_log_mean_':
        log_curves_of_all_trials = [
            util.fast_log_curve_calculation(X[i:j], model)
            for i, j in util.iter_from_X_lengths(X, lengths)
        ]

        curve_mat = np.matrix(log_curves_of_all_trials)
        std_of_log_curve = curve_mat.std(0)
        mean_of_std = std_of_log_curve.mean()
        final_log_mean = curve_mat.mean(0)[0, -1]
        score = abs(mean_of_std / final_log_mean)
    elif score_metric == '_score_metric_mean_of_std_of_gradient_divied_by_final_log_mean_':
        log_curves_of_all_trials = [
            util.fast_log_curve_calculation(X[i:j], model)
            for i, j in util.iter_from_X_lengths(X, lengths)
        ]
        curve_mat = np.matrix(log_curves_of_all_trials)
        gradient_mat = curve_mat[:, 1:] - curve_mat[:, :-1]
        std_of_log_curve = gradient_mat.std(0)
        mean_of_std = std_of_log_curve.mean()
        final_log_mean = gradient_mat.mean(0)[0, -1]
        score = abs(mean_of_std / final_log_mean)

    elif score_metric == '_score_metric_minus_diff_btw_1st_2ed_emissionprob_':
        score_of_trials = []
        for i, j in util.iter_from_X_lengths(X, lengths):
            framelogprob = util.get_emission_log_prob_matrix(X[i:j], model)

            if framelogprob.shape[1] == 1:
                print 'hidden state amount = 1, but _score_metric_minus_diff_btw_1st_2ed_emissionprob_ wants hidden state amount > 1, so no score for this turn'
                return None

            framelogprob.sort(1)
            diff_btw_1st_2ed_eprob = framelogprob[:, -1] - framelogprob[:, -2]
            score_of_trials.append(np.sum(diff_btw_1st_2ed_eprob) / (j - i))
        score = -np.array(score_of_trials).mean()

    elif score_metric == '_score_metric_minus_diff_btw_1st_2ed(>=0)_divide_maxeprob_emissionprob_':

        score_of_trials = []
        for i, j in util.iter_from_X_lengths(X, lengths):
            framelogprob = util.get_emission_log_prob_matrix(X[i:j], model)

            if framelogprob.shape[1] == 1:
                print 'hidden state amount = 1, but _score_metric_minus_diff_btw_1st_2ed_emissionprob_ wants hidden state amount > 1, so no score for this turn'
                return None

            framelogprob.sort(1)
            eprob_2ed = framelogprob[:, -2].clip(min=0)
            eprob_1st = framelogprob[:, -1].clip(min=0)

            max_eprob = np.max(eprob_1st)
            if max_eprob == 0:
                print 'max_eprob = 0, so no score for this turn'
                return None

            diff_btw_1st_2ed_eprob = eprob_1st - eprob_2ed
            score_of_trials.append(
                np.sum(diff_btw_1st_2ed_eprob) / (max_eprob * (j - i)))

        score = -np.array(score_of_trials).mean()

    elif score_metric == '_score_metric_minus_diff_btw_1st_2ed(delete<0)_divide_maxeprob_emissionprob_':

        score_of_trials = []
        for i, j in util.iter_from_X_lengths(X, lengths):
            framelogprob = util.get_emission_log_prob_matrix(X[i:j], model)

            if framelogprob.shape[1] == 1:
                print 'hidden state amount = 1, but _score_metric_minus_diff_btw_1st_2ed_emissionprob_ wants hidden state amount > 1, so no score for this turn'
                return None

            framelogprob.sort(1)
            eprob_2ed = framelogprob[:, -2]
            eprob_1st = framelogprob[:, -1]

            entry_filter = eprob_2ed > 0
            eprob_2ed = eprob_2ed[entry_filter]
            eprob_1st = eprob_1st[entry_filter]

            entry_length = len(eprob_2ed)
            if entry_length == 0:
                print 'entry_length = 0, so no score for this turn'
                return None

            max_eprob = np.max(eprob_1st)
            if max_eprob == 0:
                print 'max_eprob = 0, so no score for this turn'
                return None

            diff_btw_1st_2ed_eprob = eprob_1st - eprob_2ed
            score_of_trials.append(
                np.sum(diff_btw_1st_2ed_eprob) / (max_eprob * entry_length))

        score = -np.array(score_of_trials).mean()

    elif score_metric == '_score_metric_mean_of_(std_of_(max_emissionprob_of_trials))_':

        mat = []
        for i, j in util.iter_from_X_lengths(X, lengths):
            framelogprob = util.get_emission_log_prob_matrix(X[i:j], model)

            if framelogprob.shape[1] == 1:
                print 'hidden state amount = 1, but _score_metric_minus_diff_btw_1st_2ed_emissionprob_ wants hidden state amount > 1, so no score for this turn'
                return None

            max_omissionprb = framelogprob.max(1)
            mat.append(max_omissionprb)
        mat = np.matrix(mat)
        std_list = mat.std(0)
        score = std_list.mean()
    elif score_metric == '_score_metric_duration_of_(diff_btw_1st_2ed_emissionprob_<_10)_':

        score_of_trials = []
        for i, j in util.iter_from_X_lengths(X, lengths):
            framelogprob = util.get_emission_log_prob_matrix(X[i:j], model)

            if framelogprob.shape[1] == 1:
                print 'hidden state amount = 1, but _score_metric_minus_diff_btw_1st_2ed_emissionprob_ wants hidden state amount > 1, so no score for this turn'
                return None

            framelogprob.sort(1)
            diff_btw_1st_2ed_eprob = framelogprob[:, -1] - framelogprob[:, -2]
            duration = (diff_btw_1st_2ed_eprob < 10).sum()

            score_of_trials.append(float(duration) / (j - i))

        score = np.array(score_of_trials).mean()
    else:
        raise Exception('unknown score metric \'%s\'' % (score_metric, ))

    return score
def run(model_save_path, model_type, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_trials = trials_group_by_folder_name.values()

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    training_report_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            training_report_by_state[state_no] = json.load(
                open(
                    model_save_path + "/model_s%s_training_report.json" %
                    (state_no, ), 'r'))
        except IOError:
            print 'training report of state %s not found' % (state_no, )
            continue

    model_config_by_state = {}
    for state_no in training_report_by_state:
        best_model_record = training_report_by_state[state_no][0]
        best_model_id = best_model_record.keys()[0]
        model_config_by_state[state_no] = joblib.load(
            model_save_path + "/model_s%s_config_%s.pkl" %
            (state_no, best_model_id))

    training_data_group_by_state = {}
    training_length_array_group_by_state = {}

    for state_no in training_report_by_state:

        length_array = []
        for trial_no in range(len(list_of_trials)):
            length_array.append(list_of_trials[trial_no][state_no].shape[0])
            if trial_no == 0:
                data_tempt = list_of_trials[trial_no][state_no]
            else:
                data_tempt = np.concatenate(
                    (data_tempt, list_of_trials[trial_no][state_no]), axis=0)

        X = data_tempt
        lengths = length_array

        list_of_scored_models = training_report_by_state[state_no]
        model_config_template = model_config_by_state[state_no]

        for idx in range(len(list_of_scored_models)):
            model_id = list_of_scored_models[idx].keys()[0]
            model_score = list_of_scored_models[idx].values()[0]
            model_config = util.bring_model_id_back_to_model_config(
                model_id, model_config_template)
            model_config = _translate_into_new_config_paradigm(model_config)
            model = model_generation.model_factory(model_type, model_config)

            model = model.fit(X, lengths=lengths)

            all_log_curves_of_this_state = []
            curve_owner = []

            for trial_name in trials_group_by_folder_name:
                curve_owner.append(trial_name)
                one_log_curve_of_this_state = []

                one_log_curve_of_this_state = util.fast_log_curve_calculation(
                    trials_group_by_folder_name[trial_name][state_no],
                    model,
                )

                all_log_curves_of_this_state.append(
                    one_log_curve_of_this_state)

            np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)

            plot_trials_loglik_curves_of_one_state(
                np_matrix_traj_by_time,
                curve_owner,
                state_no,
                os.path.join(figure_save_path,
                             'check_if_score_metric_converge_loglik_curves',
                             'state_%s' % (state_no, )),
                title='state_%s_training_rank_%s_id_%s_score_%s' %
                (state_no, idx, model_id, model_score))
def run(model_save_path, trials_group_by_folder_name, parsed_options):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    base_dir = os.path.dirname(os.path.realpath(__file__))
    exp_dir = os.path.join(base_dir, 'experiment_output',
                           'test_if_parallelity_can_be_restored')
    output_id = '(tamper_input)'

    tampered = False
    if parsed_options.tamper_transmat:
        output_id += '_(tamper_transmat)'
        tampered = True
    if parsed_options.tamper_startprob:
        output_id += '_(tamper_startprob)'
        tampered = True
    output_dir = os.path.join(exp_dir, output_id)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    for state_no in model_group_by_state:
        X = one_trial_data_group_by_state[state_no]

        list_of_growing_viterbi_paths, n_samples, n_components = util.fast_growing_viterbi_paths_cal(
            X, model_group_by_state[state_no])

        list_of_lock_t, n_samples, n_components = util.fast_viterbi_lock_t_cal(
            X, model_group_by_state[state_no])

        util.output_growing_viterbi_path_img(
            list_of_growing_viterbi_paths,
            n_components,
            os.path.join(
                output_dir,
                'check_if_viterbi_path_grow_incrementally_state_%s.png' %
                state_no,
            ),
            list_of_lock_t,
        )
        util.visualize_viterbi_alog(
            X, model_group_by_state[state_no],
            os.path.join(output_dir, 'state %s visualized viterbi alog.png' %
                         (state_no, )))







        all_Xs = [trials_group_by_folder_name[trial_name][state_no]\
                for trial_name in trials_group_by_folder_name]
        tampered_X, list_of_tampered_range = tamper_input_mat(X.copy(), all_Xs)

        model = model_group_by_state[state_no]
        profile_model(model, output_dir, 'state %s raw' % (state_no, ))

        if parsed_options.tamper_transmat:
            tamper_transmat(model)
        if parsed_options.tamper_startprob:
            tamper_startprob(model)
        if tampered:
            profile_model(model, output_dir,
                          'state %s tampered' % (state_no, ))

        log_transmat = util.get_log_transmat(model)

        log_lik_of_X = np.array(util.fast_log_curve_calculation(X, model))
        framelogprob_of_X = np.array(
            util.get_emission_log_prob_matrix(X, model))
        fwdlattice_of_X = util.get_hidden_state_log_prob_matrix(X, model)
        max_hstate_of_X = fwdlattice_of_X.argmax(1)

        the_term_of_X = [framelogprob_of_X[0][max_hstate_of_X[0]]]
        for t in range(1, len(max_hstate_of_X)):
            hs1 = max_hstate_of_X[t - 1]
            hs2 = max_hstate_of_X[t]
            the_term_of_X.append(framelogprob_of_X[t][hs2] +
                                 log_transmat[hs1][hs2])

        profile_log_curve_cal(X, model, output_dir,
                              'state %s X' % (state_no, ),
                              list_of_tampered_range)

        log_lik_of_tampered_X = np.array(
            util.fast_log_curve_calculation(tampered_X, model))
        framelogprob_of_tampered_X = np.array(
            util.get_emission_log_prob_matrix(tampered_X, model))
        fwdlattice_of_tampered_X = util.get_hidden_state_log_prob_matrix(
            tampered_X, model)
        max_hstate_of_tampered_X = fwdlattice_of_tampered_X.argmax(1)

        the_term_of_tampered_X = [
            framelogprob_of_tampered_X[0][max_hstate_of_tampered_X[0]]
        ]
        for t in range(1, len(max_hstate_of_tampered_X)):
            hs1 = max_hstate_of_tampered_X[t - 1]
            hs2 = max_hstate_of_tampered_X[t]
            the_term_of_tampered_X.append(framelogprob_of_tampered_X[t][hs2] +
                                          log_transmat[hs1][hs2])

        profile_log_curve_cal(tampered_X, model, output_dir,
                              'state %s tampered_X' % (state_no, ),
                              list_of_tampered_range)

        deri_of_X = log_lik_of_X.copy()
        deri_of_X[1:] = log_lik_of_X[1:] - log_lik_of_X[:-1]
        deri_of_X[0] = 0

        deri_of_tampered_X = log_lik_of_tampered_X.copy()
        deri_of_tampered_X[
            1:] = log_lik_of_tampered_X[1:] - log_lik_of_tampered_X[:-1]
        deri_of_tampered_X[0] = 0

        diff = log_lik_of_X - log_lik_of_tampered_X

        fig = plt.figure()
        bbox_extra_artists = []

        ax = fig.add_subplot(411)
        title = "log lik"
        ax.set_title(title)
        ax.plot(log_lik_of_X,
                color='black',
                marker='None',
                linestyle='solid',
                label='Normal')
        ax.plot(log_lik_of_tampered_X,
                color='blue',
                marker='None',
                linestyle='solid',
                label='Tampered')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        ax = fig.add_subplot(412)
        title = "1st deri"
        ax.set_title(title)
        ax.plot(deri_of_X,
                color='black',
                marker='None',
                linestyle='solid',
                label='Normal')
        ax.plot(deri_of_tampered_X,
                color='blue',
                marker='None',
                linestyle='solid',
                label='Tampered')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        ax = fig.add_subplot(413)
        title = "1st deri and max emission prob of Normal"
        ax.set_title(title)
        ax.plot(deri_of_X,
                color='black',
                marker='None',
                linestyle='solid',
                label='Normal 1st deri')
        ax.plot(the_term_of_X,
                color='red',
                marker='None',
                linestyle='solid',
                label='Normal the term')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        ax = fig.add_subplot(414)
        title = "1st deri and max emission prob of Tampered"
        ax.set_title(title)
        ax.plot(deri_of_tampered_X,
                color='blue',
                marker='None',
                linestyle='solid',
                label='Tampered 1st deri')
        ax.plot(the_term_of_tampered_X,
                color='red',
                marker='None',
                linestyle='solid',
                label='Tampered the term')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        title = "output_id %s state %s" % (output_id, state_no)
        fig.suptitle(title)

        plt.tight_layout()

        fig.savefig(os.path.join(output_dir, title + ".eps"),
                    format="eps",
                    bbox_extra_artists=bbox_extra_artists,
                    bbox_inches='tight')
        fig.savefig(os.path.join(output_dir, title + ".png"),
                    format="png",
                    bbox_extra_artists=bbox_extra_artists,
                    bbox_inches='tight')
Exemplo n.º 15
0
def run():
    # load the dataset
    output_dir = os.path.join(training_config.anomaly_data_path, 'synthetic_data')
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    plt.subplot(111)
    testing_ratio_list = np.arange(0.9, 0.4, -0.1)
    num_syn_data_list = range(2, 22, 2)
    for testing_ratio in testing_ratio_list:
        x_train, x_test = load_dataset(testing_ratio=testing_ratio)
        n_real = len(x_train)

        # calculate the threshold for identification based on real training trials
        print 'calculate the threshold for identification based on real training trials'
        lengths = []
        for idx in range(len(x_train)):
            lengths.append(x_train[idx].shape[0])
            if idx == 0:
                train_data = x_train[idx]
            else:
                train_data = np.concatenate((train_data, x_train[idx]), axis=0)
        best_model, model_id = hmm_model_training.train_hmm_model(train_data, lengths)
        all_log_curves = []
        for itrial in range(len(x_train)):
            one_log_curve = util.fast_log_curve_calculation(x_train[itrial], best_model['model'])
            all_log_curves.append(one_log_curve)
        np_matrix_of_all_log_curves = np.matrix(all_log_curves)

        '''
        plt.figure()
        plt.subplot(111)
        plt.title('All log likelihood curves and calculated threshold')
        for no in range(np_matrix_of_all_log_curves.shape[0]):
            plt.plot(np_matrix_of_all_log_curves[no].tolist()[0], linestyle='--', color='gray', label = 'testing_trial')
        colors = iter(cm.rainbow(np.linspace(0,1,5)))
        for c in np.arange(0, 10, 2):
            plt.plot((np_matrix_of_all_log_curves.mean(0) - c * np_matrix_of_all_log_curves.std(0)).tolist()[0], label = 'mean-%s*std' %(c,), linestyle='solid', color = next(colors))
        plt.legend(loc='best')
        '''

        threshold_c = 3
        threshold_for_log_likelihood  = (np_matrix_of_all_log_curves.mean(0) - threshold_c * np_matrix_of_all_log_curves.std(0)).tolist()[0]

        # train the model with data augmentation and test it
        print "train the model with data augmentation and test it"
        acc_list      = []
        for num_data in num_syn_data_list:
            old_files = glob.glob(os.path.join(output_dir, '*'))
            for old_file in old_files: os.remove(old_file)
            for i in range(len(x_train)):
                print ('Generating synthetic data from real_{0}'.format(i))
                df = pd.DataFrame(x_train[i], columns=training_config.interested_data_fields)
                df.to_csv(os.path.join(output_dir, 'real_' + str(i) + '.csv'))

                generate_synthetic_data.run_finite_differece_matrix(df=df,
                                                                    num_data = num_data,
                                                                    csv_save_path=output_dir,
                                                                    trial_name='real_'+str(i))

                # generate_synthetic_data.run_bootstrap(df=df,
                #                                       num_data = num_data,
                #                                       csv_save_path=output_dir,
                #                                       trial_name='real_'+str(i))

                # generate_synthetic_data.run_sampling_from_trained_hmm_model(df=df,
                #                                                             model = best_model['model'],
                #                                                             num_data = num_data,
                #                                                             csv_save_path = output_dir,
                #                                                             trial_name = 'sampling_from_trained_model')

            # generate_synthetic_data.cross_signals_difference_with_weights(x_train = x_train,
            #                                                               total_num_data =  num_data * len(train_data),
            #                                                               csv_save_path=output_dir,
            #                                                               trial_name='cross_diff')
            
            #train model with synthetic data
            anomaly_data_group_by_folder_name = util.get_anomaly_data_for_labelled_case(training_config, output_dir)
            list_of_trials = anomaly_data_group_by_folder_name.values()
            lengths = []
            for idx in range(len(list_of_trials)):
                lengths.append(list_of_trials[idx][1].shape[0])
                if idx == 0:
                    train_data = list_of_trials[idx][1]
                else:
                    train_data = np.concatenate((train_data, list_of_trials[idx][1]), axis=0)
            best_model, model_id = hmm_model_training.train_hmm_model(train_data, lengths)

            FP = 0.0
            for itest in range(len(x_test)):
                one_log_curve = util.fast_log_curve_calculation(x_test[itest], best_model['model'])
                if one_log_curve[-1] > threshold_for_log_likelihood[-1]: FP +=1 
            idfyRate = FP / len(x_test)
            acc_list.append(idfyRate)
            print idfyRate
        plt.plot(num_syn_data_list, acc_list, 'o-', label = 'n_real=' + str(n_real))

    plt.title('Anomaly identification accuracy vs num synthetic data')
    plt.xlabel('Identification accuracy')
    plt.ylabel('Synthetic trials for each real trail')
    plt.legend()
    plt.savefig('./images/Anomaly_identification_accuracy_vs_num_synthetic_data.eps', format='eps', dpi=300)
    plt.show()
Exemplo n.º 16
0
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    expected_log = {}
    std_of_log = {}
    threshold = {}

    for state_no in model_group_by_state:
        compute_score_time_cost = 0
        total_step_times = 0

        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = []

            start_time = time.time()

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            compute_score_time_cost += time.time() - start_time
            total_step_times += len(
                trials_group_by_folder_name[trial_name][state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)
        mean_of_log_curve = np_matrix_traj_by_time.mean(0)
        std_of_log_curve = np_matrix_traj_by_time.std(0)

        score_time_cost_per_point = float(
            compute_score_time_cost) / total_step_times

        decided_threshold_log_curve = assess_threshold_and_decide(
            threshold_c_value, mean_of_log_curve, std_of_log_curve,
            np_matrix_traj_by_time, curve_owner, state_no, figure_save_path,
            score_time_cost_per_point)
        expected_log[state_no] = mean_of_log_curve.tolist()[0]
        threshold[state_no] = decided_threshold_log_curve.tolist()[0]
        std_of_log[state_no] = std_of_log_curve.tolist()[0]

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    joblib.dump(expected_log, model_save_path + "/mean_of_log_likelihood.pkl")
    joblib.dump(threshold,
                model_save_path + "/threshold_for_log_likelihood.pkl")
    joblib.dump(std_of_log, model_save_path + "/std_of_log_likelihood.pkl")
def run(model_save_path, figure_save_path, trials_group_by_folder_name,
        state_order_group_by_folder_name, parsed_options):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    state_color = {}
    color = iter(cm.rainbow(np.linspace(0, 1, state_amount)))
    for state_no in model_group_by_state:
        state_color[state_no] = color.next()

    output_dir = os.path.join(figure_save_path,
                              'test_if_gradient_can_detect_state_switch')
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    trial_amount = len(trials_group_by_folder_name)
    subpolt_amount_for_each_trial = 2
    subplot_per_row = 1
    subplot_amount = trial_amount * subpolt_amount_for_each_trial
    row_amount = int(math.ceil(float(subplot_amount) / subplot_per_row))
    fig, ax_mat = plt.subplots(nrows=row_amount, ncols=subplot_per_row)
    if row_amount == 1:
        ax_mat = ax_mat.reshape(1, -1)
    if subplot_per_row == 1:
        ax_mat = ax_mat.reshape(-1, 1)

    ax_list = []
    for i in range(trial_amount):
        for k in range(subpolt_amount_for_each_trial):
            j = subpolt_amount_for_each_trial * i + k

            row_no = j / subplot_per_row
            col_no = j % subplot_per_row
            ax_list.append(ax_mat[row_no, col_no])

    trial_count = -1
    for trial_name in trials_group_by_folder_name:
        trial_count += 1

        X = None

        state_start_idx = [0]

        state_order = state_order_group_by_folder_name[trial_name]
        for state_no in state_order:
            if X is None:
                X = trials_group_by_folder_name[trial_name][state_no]
            else:
                X = np.concatenate(
                    (X, trials_group_by_folder_name[trial_name][state_no]),
                    axis=0)
            state_start_idx.append(len(X))

        plot_idx = trial_count * 2
        ax_loglik = ax_list[plot_idx]
        ax_loglik_gradient = ax_list[plot_idx + 1]

        color_bg_by_state(state_order, state_color, state_start_idx, ax_loglik)

        color_bg_by_state(state_order, state_color, state_start_idx,
                          ax_loglik_gradient)

        log_lik_mat = []
        log_lik_gradient_mat = []
        mat_row_color = []
        mat_row_name = []
        for state_no in model_group_by_state:
            log_lik_curve = np.array(
                util.fast_log_curve_calculation(
                    X, model_group_by_state[state_no]))
            log_lik_gradient_curve = log_lik_curve[1:] - log_lik_curve[:-1]

            log_lik_mat.append(log_lik_curve)
            log_lik_gradient_mat.append(log_lik_gradient_curve)
            mat_row_color.append(state_color[state_no])
            mat_row_name.append('state %s' % (state_no, ))

        log_lik_mat = np.matrix(log_lik_mat)
        log_lik_gradient_mat = np.matrix(log_lik_gradient_mat)

        log_lik_gradient_mat[log_lik_gradient_mat < 0] = 0
        for row_no in range(log_lik_mat.shape[0]):
            ax_loglik.plot(log_lik_mat[row_no].tolist()[0],
                           label=mat_row_name[row_no],
                           color=mat_row_color[row_no])
            ax_loglik_gradient.plot(log_lik_gradient_mat[row_no].tolist()[0],
                                    label=mat_row_name[row_no],
                                    color=mat_row_color[row_no])

        title = "log-likelihood of %s HMM models" % state_amount
        ax_loglik.set_title(title)
        ax_loglik.set_ylabel('log probability')
        ax_loglik.set_xlabel('time step')
        title = "gradient of log-likelihood of %s HMM models" % state_amount
        ax_loglik_gradient.set_title(title)
        ax_loglik_gradient.set_ylabel('log probability')
        ax_loglik_gradient.set_xlabel('time step')

        title = "trial %s" % (trial_name, )

    fig.set_size_inches(8 * subplot_per_row, 2 * row_amount)
    fig.tight_layout()
    fig.savefig(os.path.join(output_dir,
                             "test_if_gradient_can_detect_state_switch.png"),
                format="png")
    fig.savefig(os.path.join(output_dir,
                             "test_if_gradient_can_detect_state_switch.eps"),
                format="eps")