def is_anomaly(adl_dataset, adl_data_test): # At train time lsanomaly calculates parameters rho and sigma lsanomaly = LSAnomaly(sigma=3, rho=0.1) res = np.reshape((adl_dataset), (-1, 1)) lsanomaly.fit(res) res_test = np.reshape((adl_data_test), (-1, 1)) predit_res = lsanomaly.predict(res_test) return predit_res, lsanomaly.predict_proba( res_test) # predicted result and probability of the result
def testing_func(a): sigma, rho = a print "Training with rho: %s, sigma: %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma) clf.fit(X_train) y_pred = clf.predict(X_test) y_pred = np.asarray(y_pred) anomalies = np.where(y_pred == 'anomaly')[0] print anomalies return [sigma, rho, len(anomalies), len(anomalies)/float(X_test.shape[0])]
def test_example_doc(doc_arrays, check_ndarray): test_pt = np.array([[0]]) x_train, predict_prob = doc_arrays anomaly_model = LSAnomaly(sigma=3, rho=0.1, seed=42) anomaly_model.fit(x_train) expected = [0.0] p = anomaly_model.predict(test_pt) assert p == expected expected = np.array([[0.7231233, 0.2768767]]) p = anomaly_model.predict_proba(test_pt) logger.debug("p = {}".format(p)) check_ndarray(expected, p)
def testing_func(a): sigma, rho = a print "Training with rho: %s, sigma: %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma) clf.fit(X_train) y_pred = clf.predict(X_test) y_pred = np.asarray(y_pred) anomalies = np.where(y_pred == 'anomaly')[0] print anomalies return [ sigma, rho, len(anomalies), len(anomalies) / float(X_test.shape[0]) ]
pool = Pool(processes=8) results = pool.map(testing_func, product(sigmas, rhos)) pool.close() pool.join() else: results = [] for rho in rhos: for sigma in sigmas: print "Now fitting: rho - %s; sigma - %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma, verbose=False) clf.fit(X_train) y_pred = clf.predict(X_test) anomalies = np.where(y_pred == 'anomaly')[0] print np.unique(anomalies) results.append([nu, gamma, len(anomalies), len(anomalies)/float(X_test.shape[0])]) test_df = DataFrame(results, columns=["Sigma", "Rho", "Anomalies", "Percent"]) test_df.to_csv("lsq_anomaly_testing_fifth_betterparams.csv") if learn: all_anom = [] # Repeat the process many times # Record all anomalies and look for those which are consistently # labeled.
pool = Pool(processes=8) results = pool.map(testing_func, product(sigmas, rhos)) pool.close() pool.join() else: results = [] for rho in rhos: for sigma in sigmas: print "Now fitting: rho - %s; sigma - %s" % (rho, sigma) clf = LSAnomaly(rho=rho, sigma=sigma, verbose=False) clf.fit(X_train) y_pred = clf.predict(X_test) anomalies = np.where(y_pred == 'anomaly')[0] print np.unique(anomalies) results.append([ nu, gamma, len(anomalies), len(anomalies) / float(X_test.shape[0]) ]) test_df = DataFrame(results, columns=["Sigma", "Rho", "Anomalies", "Percent"]) test_df.to_csv("lsq_anomaly_testing_fifth_betterparams.csv") if learn: all_anom = []