def combine_models(trained_models, df, y_target, y_inj, lable, log_vote=True, plot=True): y_test = asmatrix(y_inj) log_pred = {} linear_pred = {} for col in trained_models: model = trained_models[col] x_test = df[col].as_matrix().reshape(-1, 1) log_pred[col] = model.log_reg.predict_proba(x_test)[:, 1].reshape(-1, 1) linear_pred[col] = model.linear_reg.predict( np.log(x_test + model.eps)).reshape(-1, 1) a_log_pred = np.mean(np.hstack(log_pred.values()), axis=1) a_linear_pred = np.mean(np.hstack(linear_pred.values()), axis=1) ll = [] for _, model in trained_models.items(): if log_vote: result = model.mixl(y_test, a_log_pred, a_linear_pred) else: result = model.mixl(y_test, log_pred[_], linear_pred[_]) ll.append(result) combined_result = np.mean(np.hstack(ll), axis=1) if plot: test_plot(combined_result, lable, y_test) pr, recall = precision_recall(combined_result, lable) return roc_metric(combined_result, lable), pr[0]
def test(trained_model, target_station, k_station, test_data, y_inj, t_lbl, plot=True): y, x = asmatrix( test_data[target_station]), test_data[k_station].as_matrix() ll_test = trained_model.predict(x=x, y=y_inj) if plot: test_plot(ll_test, t_lbl, asmatrix(y_inj)) pr = ap(pred=ll_test, obs=t_lbl) return roc_metric(ll_test, t_lbl, False), pr
def main(): df = pd.read_csv('sampledata.csv') x, y = df.ix[:, 2:].as_matrix(), df.ix[:, 1:2].as_matrix() # zero-one label print("Training sets.") y_binary = (y > 0.0).astype(int) model = MixLinearModel(linear_reg=Ridge(alpha=0.5)) model.fit(x=x, y=y) dt, lbl = synthetic_fault(y, True) ll_ob = model.predict(x, y=dt) print(roc_metric(ll_ob, lbl, False)) ## Join stations models = {} colmn = df.columns[2:] roc = {} predictions = [] for col in colmn: train_col = df[col].as_matrix().reshape(-1, 1) models[col] = MixLinearModel(linear_reg=Ridge(alpha=0.5)).fit( x=train_col, y=y) # plt.subplot(3,2,2) predictions.append(models[col].predict(train_col, y=dt)) roc[col] = evaluate_model(models[col], train_col, dt, lbl) pred = np.hstack(predictions) print "AUC of average likelihood" print roc_metric(np.sum(pred, axis=1), lbl) print "AUC of individual stations" print roc result = combine_models(models, df, dt) print roc_metric(result, lbl) print "Experiment on testing data." # Testing. test_data = pd.read_csv('sampletahmo_test.csv') x_t, y_t = test_data.ix[:, 2:].as_matrix(), test_data.ix[:, 1:2].as_matrix() y_insert, t_lbl = synthetic_fault(y_t) ll_test = model.predict(x=x_t, y=y_insert) print roc_metric(ll_test, t_lbl, False) print "AUC of test dataset for 2017" # #test_roc = roc_metric(ll_test, t_lbl, plot=True) roc_test = {} t_predictions = [] for col in colmn: roc_test[col] = evaluate_model( models[col], test_data[col].as_matrix().reshape(-1, 1), y_t, t_lbl) t_predictions.append(models[col].predict( test_data[col].as_matrix().reshape(-1, 1), y=y_t)) # ll_aggregate = print roc_metric(np.mean(np.hstack(t_predictions), axis=1), t_lbl) print "AUC of individual stations " print roc_test ## Combined src result = combine_models(models, test_data, y_t) print roc_metric(result, t_lbl)
def evaluate_model(trained_model, x_test, y_test, lbl): ll_ob = trained_model.predict(x_test, y=y_test) return roc_metric(ll_ob, lbl)
def group_detection(target_station, k=3): alpha = 0.05 # k = 3 train_result = {} train_result['station'] = target_station train_result['num_k'] = k train_result['anom'] = alpha # plt.subplot(211) # plt.title(target_station) # plt.xlabel('2016') y_train, groups, lbl = synthetic_groups(train_data[target_station], plot=False, alpha=alpha, threshold=2.0) model, k_station = train(target_station=target_station, num_k=k, train_data=train_data, pairwise=False) # print "Training accuracy" ll_score = test_evaluate_group(trained_model=model, k_station=k_station, test_data=train_data, y_inj=y_train) # evaluate performance on event detection. # 1. Give max score to each element in the group injected_group = groups["injected_group"].keys() mx_ll_score = ll_score.copy() for ig in injected_group: ix_g = groups["group_events"][ig] ix_g = [ix for ix in ix_g if lbl[ix] == 1] max_score = np.max(mx_ll_score[ix_g]) mx_ll_score[ix_g] = max_score # 2. detect colllective group with abnormal events. # plt.show() # print "with out group" train_result["auc_train"] = roc_metric(ll_score, lbl) train_result["pr_train"] = ap(ll_score, lbl) # print "With group" train_result["auc_train_grp"] = roc_metric(mx_ll_score, lbl) train_result["pr_train_grp"] = ap(mx_ll_score, lbl) # print "\n---------- Testing data ---------\n" try: y_train, tgroups, lblt = synthetic_groups(test_data[target_station], plot=False, alpha=alpha, threshold=2.0) ll_score_test = test_evaluate_group(trained_model=model, k_station=k_station, test_data=test_data, y_inj=y_train) # test_plot(ll_score_test, lblt, y_train) except Exception as ex: print ex.message return train_result tmx_ll_score = ll_score_test.copy() for ig in tgroups["injected_group"].keys(): ix_g = tgroups["group_events"][ig] max_score = np.max(tmx_ll_score[ix_g]) tmx_ll_score[ix_g] = max_score # print "with out group" train_result["auc_test"] = roc_metric(ll_score_test, lblt) # , ap(ll_score_test, lblt) train_result["pr_test"] = ap(ll_score_test, lblt) # print "With group" train_result["auc_test_grp"] = roc_metric(tmx_ll_score, lblt) # , ap(tmx_ll_score, lblt) train_result["pr_test_grp"] = ap(tmx_ll_score, lblt) # print "With group" # plt.show() test_plot(tmx_ll_score, lblt, asmatrix(y_train)) # plt.show() return train_result
dt = observations.copy() abnormal_report = range(200, 210) rainy_days = range(107, 117) dt[abnormal_report] = 20.0 dt[rainy_days] = 0.0 faulty_day = abnormal_report + rainy_days lbl = np.zeros([dt.shape[0]]) lbl[faulty_day] = 1.0 return dt, lbl #plt.subplot(321) print "injected faults" dt, lbl = synthetic_fault(y, True) ll_ob = model.predict(x, y=dt) print roc_metric(ll_ob, lbl, False) #src.residual_plot(np.log(observed_value+src.eps), np.log(y+src.eps), fitted_value) #print roc_metric() #yhat = -np.log(src.predict(x, y)) def plot_synthetic(dt, y): plt.plot(dt, '.r', label='inserted faults') plt.plot(y, '.b', label='ground truth') plt.xlabel('Days') plt.ylabel('Rainall mm') plt.legend(loc='best') plt.show() def evaluate_model(trained_model, x_test, y_test, lbl):