Пример #1
0
def fit(X, y, class_names):
    '''
    function: train all the anomalious models
    '''
    for model_name in class_names:
        indices = [i for i, label in enumerate(y) if label == model_name]
        train_data = [X[i] for i in indices]
        model_list, lengths = [], []
        for i in range(len(train_data)):
            lengths.append(len(train_data[i]))
        try:
            train_data = np.concatenate(train_data)
        except ValueError:
            print('Oops!. something wrong...')
            ipdb.set_trace()
        lengths[-1] -= 1
        model_generator = model_generation.get_model_generator(
            training_config.model_type_chosen, training_config.model_config)
        for model, now_model_config in model_generator:
            model = model.fit(train_data,
                              lengths=lengths)  # n_samples, n_features
            score = model_score.score(training_config.score_metric, model,
                                      train_data, lengths)
            if score == None:
                print "scorer says to skip this model, will do"
                continue
            model_list.append({
                "model": model,
                "now_model_config": now_model_config,
                "score": score
            })
            print 'score:', score
            model_generation.update_now_score(score)
        sorted_model_list = sorted(model_list, key=lambda x: x['score'])

        best = sorted_model_list[0]
        model_id = util.get_model_config_id(best['now_model_config'])

        anomaly_model_path = os.path.join(
            training_config.anomaly_model_save_path, model_name,
            training_config.config_by_user['data_type_chosen'],
            training_config.config_by_user['model_type_chosen'],
            training_config.model_id)

        if not os.path.isdir(anomaly_model_path):
            os.makedirs(anomaly_model_path)

        joblib.dump(best['model'],
                    os.path.join(anomaly_model_path, "model_s%s.pkl" % (1, )))
def train_hmm_model(train_data, lengths):
    model_list = []
    lengths[-1] -= 1
    model_generator = model_generation.get_model_generator(
        training_config.model_type_chosen, training_config.model_config)
    for model, now_model_config in model_generator:
        model = model.fit(train_data, lengths=lengths)  # n_samples, n_features
        score = model_score.score(training_config.score_metric, model,
                                  train_data, lengths)
        if score == None:
            print "scorer says to skip this model, will do"
            continue
        model_list.append({
            "model": model,
            "now_model_config": now_model_config,
            "score": score
        })
        print 'score:', score
        model_generation.update_now_score(score)
    sorted_model_list = sorted(model_list, key=lambda x: x['score'])
    best_model = sorted_model_list[0]
    model_id = util.get_model_config_id(best_model['now_model_config'])
    return best_model, model_id
def run(model_save_path, model_type, model_config, score_metric,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_trials = trials_group_by_folder_name.values()

    trials_amount = len(trials_group_by_folder_name)

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    one_trial_data_group_by_state = list_of_trials[0]
    state_amount = len(one_trial_data_group_by_state)

    training_data_group_by_state = {}
    training_length_array_group_by_state = {}

    for state_no in range(1, state_amount + 1):
        length_array = []
        for trial_no in range(len(list_of_trials)):
            length_array.append(list_of_trials[trial_no][state_no].shape[0])
            if trial_no == 0:
                data_tempt = list_of_trials[trial_no][state_no]
            else:
                data_tempt = np.concatenate(
                    (data_tempt, list_of_trials[trial_no][state_no]), axis=0)
        training_data_group_by_state[state_no] = data_tempt
        training_length_array_group_by_state[state_no] = length_array

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    for state_no in range(1, state_amount + 1):
        model_list = []
        train_data = training_data_group_by_state[state_no]
        lengths = training_length_array_group_by_state[state_no]
        best_model, model_id = train_hmm_model(train_data, lengths)
        joblib.dump(
            best_model['model'],
            os.path.join(model_save_path, "model_s%s.pkl" % (state_no, )))

        joblib.dump(
            best_model['now_model_config'],
            os.path.join(model_save_path,
                         "model_s%s_config_%s.pkl" % (state_no, model_id)))

        joblib.dump(
            None,
            os.path.join(
                model_save_path,
                "model_s%s_score_%s.pkl" % (state_no, best_model['score'])))

        train_report = [{
            util.get_model_config_id(i['now_model_config']):
            i['score']
        } for i in sorted_model_list]
        import json
        json.dump(train_report,
                  open(
                      os.path.join(
                          model_save_path,
                          "model_s%s_training_report.json" % (state_no)), 'w'),
                  separators=(',\n', ': '))

        # plot the hidden state sequence for each state
        print
        print
        print 'Finish fitting the posterior model -> Generating the hidden state sequence...'
        print
        print
        model = best_model['model']
        if model_type in ['hmmlearn\'s HMM', 'hmmlearn\'s GMMHMM']:
            _, model.z = model.decode(X, algorithm="viterbi")

        elif model_type == 'BNPY\'s HMM':
            model.z = model.decode(X, lengths)

        elif model_type == 'PYHSMM\'s HMM':
            model.z = model.model.stateseqs[0]

        else:
            print 'Sorry, this model cannot obtain the hidden state sequence'
            return

        # plt.close("all")
        # Xdf = pd.DataFrame(X) # plot the original multimodal signals
        # Xdf.plot()

        # im_data  = np.tile(model.z, 2)
        # cmap =cm.get_cmap('jet',np.max(model.z))
        # print np.unique(model.z)
        # ax.imshow(im_data[None], aspect='auto', interpolation='nearest', vmin = 0, vmax = np.max(model.z), cmap = cmap, alpha = 0.5)

        fig, ax = plt.subplots(nrows=1, ncols=1)
        trial_len = len(model.z) / trials_amount
        color = iter(cm.rainbow(np.linspace(0, 1, trials_amount)))
        zhat = []
        for iTrial in range(trials_amount):
            zSeq = model.z[iTrial * trial_len:(iTrial + 1) * trial_len]
            ax.plot(zSeq, color=next(color))  #, linewidth=2.0
            zhat.append(zSeq.tolist() + [zSeq[-1]])
        plt.show()
        zdf = pd.DataFrame(zhat)
        plt.title('The hidden state_sequence of state_%d' % (state_no))
        zdf.to_csv(model_save_path + '/zhat.csv', index=False)
    # config for preprocessing
    'preprocessing_scaling'   : False,   # scaled data has zero mean and unit variance
    'preprocessing_normalize' : False, # normalize the individual samples to have unit norm "l1" or 'l2'
    'norm_style'              : 'l2',
    'pca_components'          : 0, # cancel the pca processing

    # threshold of derivative used in hmm online anomaly detection
    'deri_threshold'          : 200,
    
    # threshold training c value in threshold=mean-c*std
    'threshold_c_value'       : 5
}

model_config_set_name = model_store[config_by_user['model_type_chosen']]['use']
model_config          = model_store[config_by_user['model_type_chosen']]['config_set'][model_config_set_name]
model_id              = util.get_model_config_id(model_config)
model_id              = config_by_user['score_metric']+model_id
norm_style            = config_by_user['norm_style']

success_path = os.path.join(config_by_user['dataset_path'], "success")
test_success_data_path = os.path.join(config_by_user['dataset_path'], "success_for_test")
model_save_path = os.path.join(config_by_user['base_path'], "model", config_by_user['data_type_chosen'], config_by_user['model_type_chosen'])
figure_save_path = os.path.join(config_by_user['base_path'], "figure", config_by_user['data_type_chosen'], config_by_user['model_type_chosen'], model_id)

# for anomaly analysis
anomaly_data_path = config_by_user['dataset_path']
anomaly_raw_data_path = os.path.join(config_by_user['dataset_path'], 'anomalies')
anomaly_model_save_path = os.path.join(config_by_user['base_path'], "anomaly_models", config_by_user['data_type_chosen'], config_by_user['model_type_chosen'])
anomaly_identification_figure_path = os.path.join(config_by_user['base_path'], "figure", config_by_user['data_type_chosen'], config_by_user['model_type_chosen'])

exec '\n'.join("%s=%r"%i for i in config_by_user.items())
Пример #5
0
def run(model_save_path, model_type, model_config, score_metric,
        trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_trials = trials_group_by_folder_name.values()

    trials_amount = len(trials_group_by_folder_name)

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    one_trial_data_group_by_state = list_of_trials[0]
    state_amount = len(one_trial_data_group_by_state)

    training_data_group_by_state = {}
    training_length_array_group_by_state = {}

    for state_no in range(1, state_amount + 1):
        length_array = []
        for trial_no in range(len(list_of_trials)):
            length_array.append(list_of_trials[trial_no][state_no].shape[0])
            if trial_no == 0:
                data_tempt = list_of_trials[trial_no][state_no]
            else:
                data_tempt = np.concatenate(
                    (data_tempt, list_of_trials[trial_no][state_no]), axis=0)
        training_data_group_by_state[state_no] = data_tempt
        training_length_array_group_by_state[state_no] = length_array

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    for state_no in range(1, state_amount + 1):
        model_list = []
        model_generator = model_generation.get_model_generator(
            model_type, model_config)

        X = training_data_group_by_state[state_no]
        lengths = training_length_array_group_by_state[state_no]
        lengths[
            -1] -= 1  # Adapting for bnpy's observation is firt-order autoregressive gaussian
        for model, now_model_config in model_generator:
            print
            print '-' * 20
            print 'in state', state_no, ' working on config:', now_model_config
            model = model.fit(X, lengths=lengths)  #n_samples, n_features
            score = model_score.score(score_metric, model, X, lengths)

            if score == None:
                print "scorer says to skip this model, will do"
                continue

            model_list.append({
                "model": model,
                "now_model_config": now_model_config,
                "score": score
            })
            print 'score:', score
            print '=' * 20
            print

            model_generation.update_now_score(score)

        sorted_model_list = sorted(model_list, key=lambda x: x['score'])

        best = sorted_model_list[0]
        model_id = util.get_model_config_id(best['now_model_config'])

        joblib.dump(
            best['model'],
            os.path.join(model_save_path, "model_s%s.pkl" % (state_no, )))

        joblib.dump(
            best['now_model_config'],
            os.path.join(model_save_path,
                         "model_s%s_config_%s.pkl" % (state_no, model_id)))

        joblib.dump(
            None,
            os.path.join(model_save_path,
                         "model_s%s_score_%s.pkl" % (state_no, best['score'])))

        train_report = [{
            util.get_model_config_id(i['now_model_config']):
            i['score']
        } for i in sorted_model_list]
        import json
        json.dump(train_report,
                  open(
                      os.path.join(
                          model_save_path,
                          "model_s%s_training_report.json" % (state_no)), 'w'),
                  separators=(',\n', ': '))
Пример #6
0
def run(model_save_path, model_type, model_config, score_metric,
        trials_group_by_folder_name, test_trials_group_by_folder_name):

    trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        trials_group_by_folder_name)
    list_of_training_trial = trials_group_by_folder_name.values()

    test_trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length(
        test_trials_group_by_folder_name)
    list_of_test_trial = test_trials_group_by_folder_name.values()

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    one_trial_data_group_by_state = list_of_training_trial[0]
    state_amount = len(one_trial_data_group_by_state)

    training_data_group_by_state = {}
    test_data_group_by_state = {}
    for state_no in range(1, state_amount + 1):
        training_data_group_by_state[state_no] = []
        test_data_group_by_state[state_no] = []
        for trial_no in range(len(list_of_training_trial)):
            training_data_group_by_state[state_no].append(
                list_of_training_trial[trial_no][state_no])
        for trial_no in range(len(list_of_test_trial)):
            test_data_group_by_state[state_no].append(
                list_of_test_trial[trial_no][state_no])

    if not os.path.isdir(model_save_path):
        os.makedirs(model_save_path)

    for state_no in range(1, state_amount + 1):
        print 'state_no', state_no
        sorted_model_list = train_model.run(
            list_of_train_mat=training_data_group_by_state[state_no],
            list_of_test_mat=test_data_group_by_state[state_no],
            model_type=model_type,
            model_config=model_config,
            score_metric=score_metric,
        )

        best = sorted_model_list[0]
        model_id = util.get_model_config_id(best['now_model_config'])

        joblib.dump(
            best['model'],
            os.path.join(model_save_path, "model_s%s.pkl" % (state_no, )))

        joblib.dump(
            best['now_model_config'],
            os.path.join(model_save_path,
                         "model_s%s_config_%s.pkl" % (state_no, model_id)))

        joblib.dump(
            None,
            os.path.join(model_save_path,
                         "model_s%s_score_%s.pkl" % (state_no, best['score'])))

        train_report = [{
            util.get_model_config_id(i['now_model_config']):
            i['score']
        } for i in sorted_model_list]
        import json
        json.dump(train_report,
                  open(
                      os.path.join(
                          model_save_path,
                          "model_s%s_training_report.json" % (state_no)), 'w'),
                  separators=(',\n', ': '))