def run( list_of_train_mat, list_of_test_mat, model_type, model_config, score_metric, logger=None ): if logger is None: logger = logging.getLogger('birl_hmm_train_model') list_of_train_mat = np.array(list_of_train_mat) list_of_test_mat = np.array(list_of_test_mat) tried_models = [] model_generator = model_generation.get_model_generator(model_type, model_config) for raw_model, model_config in model_generator: logger.debug('-'*20) logger.debug(' working on config: %s'%model_config) try: kf = KFold(n_splits=3, shuffle=True) scores = [] for cv_train_index, cv_test_index in kf.split(list_of_train_mat): list_of_cv_train_mat = (list_of_train_mat.copy())[cv_train_index] list_of_cv_test_mat = (list_of_train_mat.copy())[cv_test_index] cv_train_lengths = [i.shape[0] for i in list_of_cv_train_mat] cv_train_lengths[-1] -= 1 #for autoregressive observation cv_train_X = np.concatenate(list_of_cv_train_mat, axis=0) cv_test_lengths = [i.shape[0] for i in list_of_cv_test_mat] cv_test_X = np.concatenate(list_of_cv_test_mat, axis=0) model = model_generation.model_factory(model_type, model_config) model = model.fit(cv_train_X, lengths=cv_train_lengths) score = model_score.score(score_metric, model, cv_test_X, cv_test_lengths) if score == None: raise Exception("scorer says to skip this model") else: scores.append(score) except Exception as e: logger.error("Failed to run CV on this model: %s"%e) logger.error("traceback: %s"%traceback.format_exc()) continue tried_models.append({ "model": model, "model_config": model_config, "cv_score_mean": np.mean(scores), "cv_score_std": np.std(scores), }) logger.debug('score: %s'%score) logger.debug('='*20) if len(tried_models) == 0: raise Exception("All models tried failed to train.") tried_models = sorted(tried_models, key=lambda x:x['cv_score_mean']) best_model = tried_models[0]['model'] test_score = tried_models[0]['cv_score_mean'] return best_model, test_score, tried_models
def fit(X, y, class_names): ''' function: train all the anomalious models ''' for model_name in class_names: indices = [i for i, label in enumerate(y) if label == model_name] train_data = [X[i] for i in indices] model_list, lengths = [], [] for i in range(len(train_data)): lengths.append(len(train_data[i])) try: train_data = np.concatenate(train_data) except ValueError: print('Oops!. something wrong...') ipdb.set_trace() lengths[-1] -= 1 model_generator = model_generation.get_model_generator( training_config.model_type_chosen, training_config.model_config) for model, now_model_config in model_generator: model = model.fit(train_data, lengths=lengths) # n_samples, n_features score = model_score.score(training_config.score_metric, model, train_data, lengths) if score == None: print "scorer says to skip this model, will do" continue model_list.append({ "model": model, "now_model_config": now_model_config, "score": score }) print 'score:', score model_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x: x['score']) best = sorted_model_list[0] model_id = util.get_model_config_id(best['now_model_config']) anomaly_model_path = os.path.join( training_config.anomaly_model_save_path, model_name, training_config.config_by_user['data_type_chosen'], training_config.config_by_user['model_type_chosen'], training_config.model_id) if not os.path.isdir(anomaly_model_path): os.makedirs(anomaly_model_path) joblib.dump(best['model'], os.path.join(anomaly_model_path, "model_s%s.pkl" % (1, )))
def train_hmm_model(train_data, lengths): model_list = [] lengths[-1] -= 1 model_generator = model_generation.get_model_generator( training_config.model_type_chosen, training_config.model_config) for model, now_model_config in model_generator: model = model.fit(train_data, lengths=lengths) # n_samples, n_features score = model_score.score(training_config.score_metric, model, train_data, lengths) if score == None: print "scorer says to skip this model, will do" continue model_list.append({ "model": model, "now_model_config": now_model_config, "score": score }) print 'score:', score model_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x: x['score']) best_model = sorted_model_list[0] model_id = util.get_model_config_id(best_model['now_model_config']) return best_model, model_id
def run(mat, model_type, model_config): mat = filter_static_points(mat) start = mat[0].copy() end = mat[-1].copy() list_of_random_startend = [] for i in range(10): start = mat[0].copy() end = mat[-1].copy() end[2] += i*0.01 list_of_random_startend.append(( start, end, )) dmp_instance = util.get_dmp_model(mat, model_type) model_list = [] model_generator = model_config_generation.get_model_config_generator(model_type, model_config) for now_model_config in model_generator: print print '-'*20 print ' working on config:', now_model_config model = { 'dmp_instance': dmp_instance, 'gen_ay': now_model_config['gen_ay'], } score, debug_var = model_score.score(model, mat, list_of_random_startend) if score == None: print "scorer says to skip this model, will do" continue tmp_d = { "model": model, "now_model_config": now_model_config, "score": score, } if DEBUG_MODE: tmp_d['debug_var'] = debug_var model_list.append(tmp_d) print 'score:', score print '='*20 print model_config_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x:x['score']) if len(sorted_model_list) == 0: print "ERORR: empty sorted_model_list." return None if DEBUG_MODE: for d in sorted_model_list: debug_var = d['debug_var'] score = d['score'] import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for tup in list_of_random_startend: start, end = tup ax.scatter(start[0], start[1], start[2], color='red') ax.scatter(end[0], end[1], end[2], color='green') ax.plot(mat[:, 0], mat[:, 1], mat[:, 2], color='black', label='orig') from matplotlib.pyplot import cm import numpy as np color=iter(cm.rainbow(np.linspace(0, 1, len(debug_var)))) for tup in debug_var: gen_mat = tup[0] dist = tup[1] ax.plot(gen_mat[:, 0], gen_mat[:, 1], gen_mat[:, 2], color='blue', label=dist) ax.set_title(str(score)+" "+str(d['now_model_config'])) ax.set_xlim3d(0, 2) ax.set_ylim3d(-2, 2) ax.set_zlim3d(-2, 2) fig.show() raw_input() return sorted_model_list
def run(model_save_path, model_type, model_config, score_metric, trials_group_by_folder_name): trials_group_by_folder_name = util.make_trials_of_each_state_the_same_length( trials_group_by_folder_name) list_of_trials = trials_group_by_folder_name.values() trials_amount = len(trials_group_by_folder_name) if not os.path.isdir(model_save_path): os.makedirs(model_save_path) one_trial_data_group_by_state = list_of_trials[0] state_amount = len(one_trial_data_group_by_state) training_data_group_by_state = {} training_length_array_group_by_state = {} for state_no in range(1, state_amount + 1): length_array = [] for trial_no in range(len(list_of_trials)): length_array.append(list_of_trials[trial_no][state_no].shape[0]) if trial_no == 0: data_tempt = list_of_trials[trial_no][state_no] else: data_tempt = np.concatenate( (data_tempt, list_of_trials[trial_no][state_no]), axis=0) training_data_group_by_state[state_no] = data_tempt training_length_array_group_by_state[state_no] = length_array if not os.path.isdir(model_save_path): os.makedirs(model_save_path) for state_no in range(1, state_amount + 1): model_list = [] model_generator = model_generation.get_model_generator( model_type, model_config) X = training_data_group_by_state[state_no] lengths = training_length_array_group_by_state[state_no] lengths[ -1] -= 1 # Adapting for bnpy's observation is firt-order autoregressive gaussian for model, now_model_config in model_generator: print print '-' * 20 print 'in state', state_no, ' working on config:', now_model_config model = model.fit(X, lengths=lengths) #n_samples, n_features score = model_score.score(score_metric, model, X, lengths) if score == None: print "scorer says to skip this model, will do" continue model_list.append({ "model": model, "now_model_config": now_model_config, "score": score }) print 'score:', score print '=' * 20 print model_generation.update_now_score(score) sorted_model_list = sorted(model_list, key=lambda x: x['score']) best = sorted_model_list[0] model_id = util.get_model_config_id(best['now_model_config']) joblib.dump( best['model'], os.path.join(model_save_path, "model_s%s.pkl" % (state_no, ))) joblib.dump( best['now_model_config'], os.path.join(model_save_path, "model_s%s_config_%s.pkl" % (state_no, model_id))) joblib.dump( None, os.path.join(model_save_path, "model_s%s_score_%s.pkl" % (state_no, best['score']))) train_report = [{ util.get_model_config_id(i['now_model_config']): i['score'] } for i in sorted_model_list] import json json.dump(train_report, open( os.path.join( model_save_path, "model_s%s_training_report.json" % (state_no)), 'w'), separators=(',\n', ': '))