예제 #1
0
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    threshold_group_by_state = {}
    mean_curve_group_by_state = {}

    for state_no in model_group_by_state:

        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = []

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)
        mean_of_log_curve = np_matrix_traj_by_time.mean(0)
        diff_traj_by_time = np_matrix_traj_by_time - mean_of_log_curve
        deri_of_diff_traj_by_time = diff_traj_by_time[:,
                                                      1:] - diff_traj_by_time[:, :
                                                                              -1]
        mean_curve_group_by_state[state_no] = mean_of_log_curve
        threshold_group_by_state[state_no] = assess_threshold_and_decide(
            deri_of_diff_traj_by_time,
            curve_owner,
            state_no,
            figure_save_path,
        )

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    if len(threshold_group_by_state) != 0:
        joblib.dump(threshold_group_by_state,
                    model_save_path + "/threshold_for_deri_of_diff.pkl")
        joblib.dump(mean_curve_group_by_state,
                    model_save_path + "/mean_curve_group_by_state.pkl")
예제 #2
0
def run(model_save_path, 
    figure_save_path,
    threshold_c_value,
    trials_group_by_folder_name,
    data_class,
):

    output_dir = os.path.join(
        figure_save_path,
        "gradient_of_log_likelihood_plot",
        data_class,
    )
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
        
    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues().next()
    state_amount = len(one_trial_data_group_by_state)

    threshold_constant = 10
    threshold_offset = 10

    model_group_by_state = {}
    for state_no in range(1, state_amount+1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path+"/model_s%s.pkl"%(state_no,))
        except IOError:
            print 'model of state %s not found'%(state_no,)
            continue

    for state_no in model_group_by_state:


        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = [] 

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no]
            )


            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std 
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)


        assess_threshold_and_decide(
            np_matrix_traj_by_time, 
            curve_owner, 
            state_no, 
            output_dir, 
            data_class,
        )
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    threshold_constant = 10
    threshold_offset = 10

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    expected_log = []
    std_of_log = []
    deri_threshold = []

    for state_no in model_group_by_state:

        all_log_curves_of_this_state = []
        list_of_log_prob_mat = []
        log_prob_owner = []
        for trial_name in trials_group_by_folder_name:
            log_prob_owner.append(trial_name)

            emission_log_prob_mat = util.get_emission_log_prob_matrix(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            list_of_log_prob_mat.append(emission_log_prob_mat)

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)
        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)

        gradient_traj_by_time = np_matrix_traj_by_time[:,
                                                       1:] - np_matrix_traj_by_time[:, :
                                                                                    -1]

        plot_log_prob_of_all_trials(gradient_traj_by_time,
                                    list_of_log_prob_mat, log_prob_owner,
                                    state_no, figure_save_path)
예제 #4
0
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    threshold_constant = 10
    threshold_offset = 10

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    for state_no in model_group_by_state:
        compute_score_time_cost = 0
        total_step_times = 0

        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = []

            start_time = time.time()

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            compute_score_time_cost += time.time() - start_time
            total_step_times += len(
                trials_group_by_folder_name[trial_name][state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)

        score_time_cost_per_point = float(
            compute_score_time_cost) / total_step_times

        assess_threshold_and_decide(np_matrix_traj_by_time, curve_owner,
                                    state_no, figure_save_path,
                                    score_time_cost_per_point)
def run(model_save_path, figure_save_path, trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

        fig = plt.figure(1)
        ax = fig.add_subplot(111)
        from matplotlib.pyplot import cm
    for trial_name in trials_group_by_folder_name:
        color = iter(cm.rainbow(np.linspace(0, 1, state_amount)))
        all_log_curves_of_this_model = [[]]
        for model_no in model_group_by_state:
            all_log_curves_of_this_model.append([])
            for state_no in range(1, state_amount + 1):
                one_log_curve_of_this_model = util.fast_log_curve_calculation(
                    trials_group_by_folder_name[trial_name][state_no],
                    model_group_by_state[model_no])
                all_log_curves_of_this_model[model_no] = np.hstack([
                    all_log_curves_of_this_model[model_no],
                    one_log_curve_of_this_model
                ])
            ax.plot(all_log_curves_of_this_model[model_no],
                    linestyle="solid",
                    label='state_' + str(model_no),
                    color=next(color))
        title = ('skill_identification' + trial_name)
        ax.set_title(title)
        if not os.path.isdir(figure_save_path + '/skill_identification_plot'):
            os.makedirs(figure_save_path + '/skill_identification_plot')
        fig.savefig(os.path.join(figure_save_path, 'skill_identification_plot',
                                 title + ".jpg"),
                    format="jpg")
    fig.show()
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    threshold_constant = 10
    threshold_offset = 10

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    expected_log = []
    std_of_log = []
    deri_threshold = []

    for state_no in model_group_by_state:

        list_of_log_prob_mat = []
        log_prob_owner = []
        for trial_name in trials_group_by_folder_name:
            log_prob_owner.append(trial_name)

            hidden_state_log_prob = util.get_hidden_state_log_prob_matrix(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            list_of_log_prob_mat.append(hidden_state_log_prob)

        # use np matrix to facilitate the computation of mean curve and std
        plot_log_prob_of_all_trials(list_of_log_prob_mat, log_prob_owner,
                                    state_no, figure_save_path)
예제 #7
0
def run(model_save_path, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    expected_log = {}
    std_of_log = {}
    threshold = {}

    for state_no in model_group_by_state:
        compute_score_time_cost = 0
        total_step_times = 0

        all_log_curves_of_this_state = []
        curve_owner = []
        for trial_name in trials_group_by_folder_name:
            curve_owner.append(trial_name)
            one_log_curve_of_this_state = []

            start_time = time.time()

            one_log_curve_of_this_state = util.fast_log_curve_calculation(
                trials_group_by_folder_name[trial_name][state_no],
                model_group_by_state[state_no])

            compute_score_time_cost += time.time() - start_time
            total_step_times += len(
                trials_group_by_folder_name[trial_name][state_no])

            all_log_curves_of_this_state.append(one_log_curve_of_this_state)

        # use np matrix to facilitate the computation of mean curve and std
        np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)
        mean_of_log_curve = np_matrix_traj_by_time.mean(0)
        std_of_log_curve = np_matrix_traj_by_time.std(0)

        score_time_cost_per_point = float(
            compute_score_time_cost) / total_step_times

        decided_threshold_log_curve = assess_threshold_and_decide(
            threshold_c_value, mean_of_log_curve, std_of_log_curve,
            np_matrix_traj_by_time, curve_owner, state_no, figure_save_path,
            score_time_cost_per_point)
        expected_log[state_no] = mean_of_log_curve.tolist()[0]
        threshold[state_no] = decided_threshold_log_curve.tolist()[0]
        std_of_log[state_no] = std_of_log_curve.tolist()[0]

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    joblib.dump(expected_log, model_save_path + "/mean_of_log_likelihood.pkl")
    joblib.dump(threshold,
                model_save_path + "/threshold_for_log_likelihood.pkl")
    joblib.dump(std_of_log, model_save_path + "/std_of_log_likelihood.pkl")
def run(model_save_path, model_type, model_config, score_metric,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_trials = trials_group_by_folder_name.values()

    trials_amount = len(trials_group_by_folder_name)

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    one_trial_data_group_by_state = list_of_trials[0]
    state_amount = len(one_trial_data_group_by_state)

    training_data_group_by_state = {}
    training_length_array_group_by_state = {}

    for state_no in range(1, state_amount + 1):
        length_array = []
        for trial_no in range(len(list_of_trials)):
            length_array.append(list_of_trials[trial_no][state_no].shape[0])
            if trial_no == 0:
                data_tempt = list_of_trials[trial_no][state_no]
            else:
                data_tempt = np.concatenate(
                    (data_tempt, list_of_trials[trial_no][state_no]), axis=0)
        training_data_group_by_state[state_no] = data_tempt
        training_length_array_group_by_state[state_no] = length_array

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    for state_no in range(1, state_amount + 1):
        model_list = []
        train_data = training_data_group_by_state[state_no]
        lengths = training_length_array_group_by_state[state_no]
        best_model, model_id = train_hmm_model(train_data, lengths)
        joblib.dump(
            best_model['model'],
            os.path.join(model_save_path, "model_s%s.pkl" % (state_no, )))

        joblib.dump(
            best_model['now_model_config'],
            os.path.join(model_save_path,
                         "model_s%s_config_%s.pkl" % (state_no, model_id)))

        joblib.dump(
            None,
            os.path.join(
                model_save_path,
                "model_s%s_score_%s.pkl" % (state_no, best_model['score'])))

        train_report = [{
            util.get_model_config_id(i['now_model_config']):
            i['score']
        } for i in sorted_model_list]
        import json
        json.dump(train_report,
                  open(
                      os.path.join(
                          model_save_path,
                          "model_s%s_training_report.json" % (state_no)), 'w'),
                  separators=(',\n', ': '))

        # plot the hidden state sequence for each state
        print
        print
        print 'Finish fitting the posterior model -> Generating the hidden state sequence...'
        print
        print
        model = best_model['model']
        if model_type in ['hmmlearn\'s HMM', 'hmmlearn\'s GMMHMM']:
            _, model.z = model.decode(X, algorithm="viterbi")

        elif model_type == 'BNPY\'s HMM':
            model.z = model.decode(X, lengths)

        elif model_type == 'PYHSMM\'s HMM':
            model.z = model.model.stateseqs[0]

        else:
            print 'Sorry, this model cannot obtain the hidden state sequence'
            return

        # plt.close("all")
        # Xdf = pd.DataFrame(X) # plot the original multimodal signals
        # Xdf.plot()

        # im_data  = np.tile(model.z, 2)
        # cmap =cm.get_cmap('jet',np.max(model.z))
        # print np.unique(model.z)
        # ax.imshow(im_data[None], aspect='auto', interpolation='nearest', vmin = 0, vmax = np.max(model.z), cmap = cmap, alpha = 0.5)

        fig, ax = plt.subplots(nrows=1, ncols=1)
        trial_len = len(model.z) / trials_amount
        color = iter(cm.rainbow(np.linspace(0, 1, trials_amount)))
        zhat = []
        for iTrial in range(trials_amount):
            zSeq = model.z[iTrial * trial_len:(iTrial + 1) * trial_len]
            ax.plot(zSeq, color=next(color))  #, linewidth=2.0
            zhat.append(zSeq.tolist() + [zSeq[-1]])
        plt.show()
        zdf = pd.DataFrame(zhat)
        plt.title('The hidden state_sequence of state_%d' % (state_no))
        zdf.to_csv(model_save_path + '/zhat.csv', index=False)
def run(model_save_path, figure_save_path, trials_group_by_folder_name,
        state_order_group_by_folder_name, parsed_options):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    state_color = {}
    color = iter(cm.rainbow(np.linspace(0, 1, state_amount)))
    for state_no in model_group_by_state:
        state_color[state_no] = color.next()

    output_dir = os.path.join(figure_save_path,
                              'test_if_gradient_can_detect_state_switch')
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    trial_amount = len(trials_group_by_folder_name)
    subpolt_amount_for_each_trial = 2
    subplot_per_row = 1
    subplot_amount = trial_amount * subpolt_amount_for_each_trial
    row_amount = int(math.ceil(float(subplot_amount) / subplot_per_row))
    fig, ax_mat = plt.subplots(nrows=row_amount, ncols=subplot_per_row)
    if row_amount == 1:
        ax_mat = ax_mat.reshape(1, -1)
    if subplot_per_row == 1:
        ax_mat = ax_mat.reshape(-1, 1)

    ax_list = []
    for i in range(trial_amount):
        for k in range(subpolt_amount_for_each_trial):
            j = subpolt_amount_for_each_trial * i + k

            row_no = j / subplot_per_row
            col_no = j % subplot_per_row
            ax_list.append(ax_mat[row_no, col_no])

    trial_count = -1
    for trial_name in trials_group_by_folder_name:
        trial_count += 1

        X = None

        state_start_idx = [0]

        state_order = state_order_group_by_folder_name[trial_name]
        for state_no in state_order:
            if X is None:
                X = trials_group_by_folder_name[trial_name][state_no]
            else:
                X = np.concatenate(
                    (X, trials_group_by_folder_name[trial_name][state_no]),
                    axis=0)
            state_start_idx.append(len(X))

        plot_idx = trial_count * 2
        ax_loglik = ax_list[plot_idx]
        ax_loglik_gradient = ax_list[plot_idx + 1]

        color_bg_by_state(state_order, state_color, state_start_idx, ax_loglik)

        color_bg_by_state(state_order, state_color, state_start_idx,
                          ax_loglik_gradient)

        log_lik_mat = []
        log_lik_gradient_mat = []
        mat_row_color = []
        mat_row_name = []
        for state_no in model_group_by_state:
            log_lik_curve = np.array(
                util.fast_log_curve_calculation(
                    X, model_group_by_state[state_no]))
            log_lik_gradient_curve = log_lik_curve[1:] - log_lik_curve[:-1]

            log_lik_mat.append(log_lik_curve)
            log_lik_gradient_mat.append(log_lik_gradient_curve)
            mat_row_color.append(state_color[state_no])
            mat_row_name.append('state %s' % (state_no, ))

        log_lik_mat = np.matrix(log_lik_mat)
        log_lik_gradient_mat = np.matrix(log_lik_gradient_mat)

        log_lik_gradient_mat[log_lik_gradient_mat < 0] = 0
        for row_no in range(log_lik_mat.shape[0]):
            ax_loglik.plot(log_lik_mat[row_no].tolist()[0],
                           label=mat_row_name[row_no],
                           color=mat_row_color[row_no])
            ax_loglik_gradient.plot(log_lik_gradient_mat[row_no].tolist()[0],
                                    label=mat_row_name[row_no],
                                    color=mat_row_color[row_no])

        title = "log-likelihood of %s HMM models" % state_amount
        ax_loglik.set_title(title)
        ax_loglik.set_ylabel('log probability')
        ax_loglik.set_xlabel('time step')
        title = "gradient of log-likelihood of %s HMM models" % state_amount
        ax_loglik_gradient.set_title(title)
        ax_loglik_gradient.set_ylabel('log probability')
        ax_loglik_gradient.set_xlabel('time step')

        title = "trial %s" % (trial_name, )

    fig.set_size_inches(8 * subplot_per_row, 2 * row_amount)
    fig.tight_layout()
    fig.savefig(os.path.join(output_dir,
                             "test_if_gradient_can_detect_state_switch.png"),
                format="png")
    fig.savefig(os.path.join(output_dir,
                             "test_if_gradient_can_detect_state_switch.eps"),
                format="eps")
def run(model_save_path, model_type, figure_save_path, threshold_c_value,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_trials = trials_group_by_folder_name.values()

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    training_report_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            training_report_by_state[state_no] = json.load(
                open(
                    model_save_path + "/model_s%s_training_report.json" %
                    (state_no, ), 'r'))
        except IOError:
            print 'training report of state %s not found' % (state_no, )
            continue

    model_config_by_state = {}
    for state_no in training_report_by_state:
        best_model_record = training_report_by_state[state_no][0]
        best_model_id = best_model_record.keys()[0]
        model_config_by_state[state_no] = joblib.load(
            model_save_path + "/model_s%s_config_%s.pkl" %
            (state_no, best_model_id))

    training_data_group_by_state = {}
    training_length_array_group_by_state = {}

    for state_no in training_report_by_state:

        length_array = []
        for trial_no in range(len(list_of_trials)):
            length_array.append(list_of_trials[trial_no][state_no].shape[0])
            if trial_no == 0:
                data_tempt = list_of_trials[trial_no][state_no]
            else:
                data_tempt = np.concatenate(
                    (data_tempt, list_of_trials[trial_no][state_no]), axis=0)

        X = data_tempt
        lengths = length_array

        list_of_scored_models = training_report_by_state[state_no]
        model_config_template = model_config_by_state[state_no]

        for idx in range(len(list_of_scored_models)):
            model_id = list_of_scored_models[idx].keys()[0]
            model_score = list_of_scored_models[idx].values()[0]
            model_config = util.bring_model_id_back_to_model_config(
                model_id, model_config_template)
            model_config = _translate_into_new_config_paradigm(model_config)
            model = model_generation.model_factory(model_type, model_config)

            model = model.fit(X, lengths=lengths)

            all_log_curves_of_this_state = []
            curve_owner = []

            for trial_name in trials_group_by_folder_name:
                curve_owner.append(trial_name)
                one_log_curve_of_this_state = []

                one_log_curve_of_this_state = util.fast_log_curve_calculation(
                    trials_group_by_folder_name[trial_name][state_no],
                    model,
                )

                all_log_curves_of_this_state.append(
                    one_log_curve_of_this_state)

            np_matrix_traj_by_time = np.matrix(all_log_curves_of_this_state)

            plot_trials_loglik_curves_of_one_state(
                np_matrix_traj_by_time,
                curve_owner,
                state_no,
                os.path.join(figure_save_path,
                             'check_if_score_metric_converge_loglik_curves',
                             'state_%s' % (state_no, )),
                title='state_%s_training_rank_%s_id_%s_score_%s' %
                (state_no, idx, model_id, model_score))
def run(model_save_path, trials_group_by_folder_name, parsed_options):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)

    one_trial_data_group_by_state = trials_group_by_folder_name.itervalues(
    ).next()
    state_amount = len(one_trial_data_group_by_state)

    model_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        try:
            model_group_by_state[state_no] = joblib.load(model_save_path +
                                                         "/model_s%s.pkl" %
                                                         (state_no, ))
        except IOError:
            print 'model of state %s not found' % (state_no, )
            continue

    base_dir = os.path.dirname(os.path.realpath(__file__))
    exp_dir = os.path.join(base_dir, 'experiment_output',
                           'test_if_parallelity_can_be_restored')
    output_id = '(tamper_input)'

    tampered = False
    if parsed_options.tamper_transmat:
        output_id += '_(tamper_transmat)'
        tampered = True
    if parsed_options.tamper_startprob:
        output_id += '_(tamper_startprob)'
        tampered = True
    output_dir = os.path.join(exp_dir, output_id)
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    for state_no in model_group_by_state:
        X = one_trial_data_group_by_state[state_no]

        list_of_growing_viterbi_paths, n_samples, n_components = util.fast_growing_viterbi_paths_cal(
            X, model_group_by_state[state_no])

        list_of_lock_t, n_samples, n_components = util.fast_viterbi_lock_t_cal(
            X, model_group_by_state[state_no])

        util.output_growing_viterbi_path_img(
            list_of_growing_viterbi_paths,
            n_components,
            os.path.join(
                output_dir,
                'check_if_viterbi_path_grow_incrementally_state_%s.png' %
                state_no,
            ),
            list_of_lock_t,
        )
        util.visualize_viterbi_alog(
            X, model_group_by_state[state_no],
            os.path.join(output_dir, 'state %s visualized viterbi alog.png' %
                         (state_no, )))







        all_Xs = [trials_group_by_folder_name[trial_name][state_no]\
                for trial_name in trials_group_by_folder_name]
        tampered_X, list_of_tampered_range = tamper_input_mat(X.copy(), all_Xs)

        model = model_group_by_state[state_no]
        profile_model(model, output_dir, 'state %s raw' % (state_no, ))

        if parsed_options.tamper_transmat:
            tamper_transmat(model)
        if parsed_options.tamper_startprob:
            tamper_startprob(model)
        if tampered:
            profile_model(model, output_dir,
                          'state %s tampered' % (state_no, ))

        log_transmat = util.get_log_transmat(model)

        log_lik_of_X = np.array(util.fast_log_curve_calculation(X, model))
        framelogprob_of_X = np.array(
            util.get_emission_log_prob_matrix(X, model))
        fwdlattice_of_X = util.get_hidden_state_log_prob_matrix(X, model)
        max_hstate_of_X = fwdlattice_of_X.argmax(1)

        the_term_of_X = [framelogprob_of_X[0][max_hstate_of_X[0]]]
        for t in range(1, len(max_hstate_of_X)):
            hs1 = max_hstate_of_X[t - 1]
            hs2 = max_hstate_of_X[t]
            the_term_of_X.append(framelogprob_of_X[t][hs2] +
                                 log_transmat[hs1][hs2])

        profile_log_curve_cal(X, model, output_dir,
                              'state %s X' % (state_no, ),
                              list_of_tampered_range)

        log_lik_of_tampered_X = np.array(
            util.fast_log_curve_calculation(tampered_X, model))
        framelogprob_of_tampered_X = np.array(
            util.get_emission_log_prob_matrix(tampered_X, model))
        fwdlattice_of_tampered_X = util.get_hidden_state_log_prob_matrix(
            tampered_X, model)
        max_hstate_of_tampered_X = fwdlattice_of_tampered_X.argmax(1)

        the_term_of_tampered_X = [
            framelogprob_of_tampered_X[0][max_hstate_of_tampered_X[0]]
        ]
        for t in range(1, len(max_hstate_of_tampered_X)):
            hs1 = max_hstate_of_tampered_X[t - 1]
            hs2 = max_hstate_of_tampered_X[t]
            the_term_of_tampered_X.append(framelogprob_of_tampered_X[t][hs2] +
                                          log_transmat[hs1][hs2])

        profile_log_curve_cal(tampered_X, model, output_dir,
                              'state %s tampered_X' % (state_no, ),
                              list_of_tampered_range)

        deri_of_X = log_lik_of_X.copy()
        deri_of_X[1:] = log_lik_of_X[1:] - log_lik_of_X[:-1]
        deri_of_X[0] = 0

        deri_of_tampered_X = log_lik_of_tampered_X.copy()
        deri_of_tampered_X[
            1:] = log_lik_of_tampered_X[1:] - log_lik_of_tampered_X[:-1]
        deri_of_tampered_X[0] = 0

        diff = log_lik_of_X - log_lik_of_tampered_X

        fig = plt.figure()
        bbox_extra_artists = []

        ax = fig.add_subplot(411)
        title = "log lik"
        ax.set_title(title)
        ax.plot(log_lik_of_X,
                color='black',
                marker='None',
                linestyle='solid',
                label='Normal')
        ax.plot(log_lik_of_tampered_X,
                color='blue',
                marker='None',
                linestyle='solid',
                label='Tampered')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        ax = fig.add_subplot(412)
        title = "1st deri"
        ax.set_title(title)
        ax.plot(deri_of_X,
                color='black',
                marker='None',
                linestyle='solid',
                label='Normal')
        ax.plot(deri_of_tampered_X,
                color='blue',
                marker='None',
                linestyle='solid',
                label='Tampered')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        ax = fig.add_subplot(413)
        title = "1st deri and max emission prob of Normal"
        ax.set_title(title)
        ax.plot(deri_of_X,
                color='black',
                marker='None',
                linestyle='solid',
                label='Normal 1st deri')
        ax.plot(the_term_of_X,
                color='red',
                marker='None',
                linestyle='solid',
                label='Normal the term')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        ax = fig.add_subplot(414)
        title = "1st deri and max emission prob of Tampered"
        ax.set_title(title)
        ax.plot(deri_of_tampered_X,
                color='blue',
                marker='None',
                linestyle='solid',
                label='Tampered 1st deri')
        ax.plot(the_term_of_tampered_X,
                color='red',
                marker='None',
                linestyle='solid',
                label='Tampered the term')
        for r in list_of_tampered_range:
            ax.axvspan(r[0], r[1], facecolor='red', alpha=0.5)
        lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        bbox_extra_artists.append(lgd)

        title = "output_id %s state %s" % (output_id, state_no)
        fig.suptitle(title)

        plt.tight_layout()

        fig.savefig(os.path.join(output_dir, title + ".eps"),
                    format="eps",
                    bbox_extra_artists=bbox_extra_artists,
                    bbox_inches='tight')
        fig.savefig(os.path.join(output_dir, title + ".png"),
                    format="png",
                    bbox_extra_artists=bbox_extra_artists,
                    bbox_inches='tight')
예제 #12
0
def run(model_save_path, model_type, model_config, score_metric,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_trials = trials_group_by_folder_name.values()

    trials_amount = len(trials_group_by_folder_name)

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    one_trial_data_group_by_state = list_of_trials[0]
    state_amount = len(one_trial_data_group_by_state)

    training_data_group_by_state = {}
    training_length_array_group_by_state = {}

    for state_no in range(1, state_amount + 1):
        length_array = []
        for trial_no in range(len(list_of_trials)):
            length_array.append(list_of_trials[trial_no][state_no].shape[0])
            if trial_no == 0:
                data_tempt = list_of_trials[trial_no][state_no]
            else:
                data_tempt = np.concatenate(
                    (data_tempt, list_of_trials[trial_no][state_no]), axis=0)
        training_data_group_by_state[state_no] = data_tempt
        training_length_array_group_by_state[state_no] = length_array

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    for state_no in range(1, state_amount + 1):
        model_list = []
        model_generator = model_generation.get_model_generator(
            model_type, model_config)

        X = training_data_group_by_state[state_no]
        lengths = training_length_array_group_by_state[state_no]
        lengths[
            -1] -= 1  # Adapting for bnpy's observation is firt-order autoregressive gaussian
        for model, now_model_config in model_generator:
            print
            print '-' * 20
            print 'in state', state_no, ' working on config:', now_model_config
            model = model.fit(X, lengths=lengths)  #n_samples, n_features
            score = model_score.score(score_metric, model, X, lengths)

            if score == None:
                print "scorer says to skip this model, will do"
                continue

            model_list.append({
                "model": model,
                "now_model_config": now_model_config,
                "score": score
            })
            print 'score:', score
            print '=' * 20
            print

            model_generation.update_now_score(score)

        sorted_model_list = sorted(model_list, key=lambda x: x['score'])

        best = sorted_model_list[0]
        model_id = util.get_model_config_id(best['now_model_config'])

        joblib.dump(
            best['model'],
            os.path.join(model_save_path, "model_s%s.pkl" % (state_no, )))

        joblib.dump(
            best['now_model_config'],
            os.path.join(model_save_path,
                         "model_s%s_config_%s.pkl" % (state_no, model_id)))

        joblib.dump(
            None,
            os.path.join(model_save_path,
                         "model_s%s_score_%s.pkl" % (state_no, best['score'])))

        train_report = [{
            util.get_model_config_id(i['now_model_config']):
            i['score']
        } for i in sorted_model_list]
        import json
        json.dump(train_report,
                  open(
                      os.path.join(
                          model_save_path,
                          "model_s%s_training_report.json" % (state_no)), 'w'),
                  separators=(',\n', ': '))
예제 #13
0
def run(model_save_path, model_type, model_config, score_metric,
        trials_group_by_folder_name, test_trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_training_trial = trials_group_by_folder_name.values()

    test_trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        test_trials_group_by_folder_name)
    list_of_test_trial = test_trials_group_by_folder_name.values()

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    one_trial_data_group_by_state = list_of_training_trial[0]
    state_amount = len(one_trial_data_group_by_state)

    training_data_group_by_state = {}
    test_data_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        training_data_group_by_state[state_no] = []
        test_data_group_by_state[state_no] = []
        for trial_no in range(len(list_of_training_trial)):
            training_data_group_by_state[state_no].append(
                list_of_training_trial[trial_no][state_no])
        for trial_no in range(len(list_of_test_trial)):
            test_data_group_by_state[state_no].append(
                list_of_test_trial[trial_no][state_no])

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    for state_no in range(1, state_amount + 1):
        print 'state_no', state_no
        sorted_model_list = train_model.run(
            list_of_train_mat=training_data_group_by_state[state_no],
            list_of_test_mat=test_data_group_by_state[state_no],
            model_type=model_type,
            model_config=model_config,
            score_metric=score_metric,
        )

        best = sorted_model_list[0]
        model_id = util.get_model_config_id(best['now_model_config'])

        joblib.dump(
            best['model'],
            os.path.join(model_save_path, "model_s%s.pkl" % (state_no, )))

        joblib.dump(
            best['now_model_config'],
            os.path.join(model_save_path,
                         "model_s%s_config_%s.pkl" % (state_no, model_id)))

        joblib.dump(
            None,
            os.path.join(model_save_path,
                         "model_s%s_score_%s.pkl" % (state_no, best['score'])))

        train_report = [{
            util.get_model_config_id(i['now_model_config']):
            i['score']
        } for i in sorted_model_list]
        import json
        json.dump(train_report,
                  open(
                      os.path.join(
                          model_save_path,
                          "model_s%s_training_report.json" % (state_no)), 'w'),
                  separators=(',\n', ': '))