def get_model(num_exm, num_train, lens, block_len, blocks=1, anomaly_prob=0.15): print( 'Generating {0} sequences, {1} for training, each with {2} anomaly probability.' .format(num_exm, num_train, anomaly_prob)) cnt = 0 X = [] Y = [] label = [] lblcnt = co.matrix(0.0, (1, lens)) for i in range(num_exm): (exm, lbl, marker) = ToyData.get_2state_anom_seq(lens, block_len, anom_prob=anomaly_prob, num_blocks=blocks) cnt += lens X.append(exm) Y.append(lbl) label.append(marker) # some lbl statistics if i < num_train: lblcnt += lbl X = normalize_sequence_data(X) return (SOHMM(X[0:num_train], Y[0:num_train]), SOHMM(X[num_train:], Y[num_train:]), SOHMM(X, Y), label)
def get_model(num_exm, num_train, lens, feats, anomaly_prob=0.15): print( 'Generating {0} sequences, {1} for training, each with {2} anomaly probability.' .format(num_exm, num_train, anomaly_prob)) mean = 0.0 cnt = 0 X = [] Y = [] label = [] for i in range(num_exm): (exm, lbl, marker) = ToyData.get_2state_gaussian_seq(lens, dims=feats, anom_prob=anomaly_prob) #if i<4: # (exm,lbl) = ToyData.get_2state_gaussian_seq(LENS,dims=2,means1=[1,-3],means2=[3,7],vars1=[1,1],vars2=[1,1]) mean += co.matrix(1.0, (1, lens)) * exm.trans() cnt += lens X.append(exm) Y.append(lbl) label.append(marker) mean = mean / float(cnt) for i in range(num_exm): #if (i<10): # pos = int(np.single(co.uniform(1))*float(LENS)*0.8 + 4.0) # print pos # trainX[i][1,pos] = 100.0 for d in range(feats): X[i][d, :] = X[i][d, :] - mean[d] return (SOHMM(X[0:num_train], Y[0:num_train]), SOHMM(X[num_train:], Y[num_train:]), SOHMM(X, Y), label)
def build_fisher_kernel(data, labels, num_train, ord=2, param=2, set_rand=False): # estimate the transition and emission matrix given the training # data only. Number of states is specifified in 'param'. N = len(data) (F, LEN) = data[0].size A = np.zeros((param, param)) E = np.zeros((param, F)) phi = co.matrix(0.0, (param * param + F * param, N)) cnt = 0 cnt_states = np.zeros(param) for n in xrange(num_train): lbl = np.array(labels[n])[0, :] exm = np.array(data[n]) for i in range(param): for j in range(param): A[i, j] += np.where((lbl[:-1] == i) & (lbl[1:] == j))[0].size for i in range(param): for f in range(F): inds = np.where(lbl == i)[0] E[i, f] += np.sum(exm[f, inds]) cnt_states[i] += inds.size cnt += LEN for i in range(param): E[i, :] /= cnt_states[i] sol = co.matrix( np.vstack( (A.reshape(param * param, 1) / float(cnt), E.reshape(param * F, 1)))) print sol if set_rand: print('Set random parameter vector for Fisher kernel.') # sol = co.uniform(param*param+param*F, a=-1.0, b=+1.0) sol = co.uniform(param * param + param * F) model = SOHMM(data, labels) for n in range(N): (val, latent, phi[:, n]) = model.argmax(sol, n) phi[:, n] /= np.linalg.norm(phi[:, n], ord=ord) kern = Kernel.get_kernel(phi, phi) return kern, phi
def get_model(num_exm, num_train, lens, feats, anomaly_prob=0.15): print('Generating {0} sequences, {1} for training, each with {2} anomaly probability.'.format(num_exm, num_train, anomaly_prob)) mean = np.zeros(feats) cnt = 0 X = [] Y = [] label = [] for i in range(num_exm): exm, lbl, marker = get_2state_gaussian_seq(lens, dims=feats, anom_prob=anomaly_prob) mean += np.ones((1, lens)).dot(exm.T).reshape(feats) / np.float(lens) X.append(exm) Y.append(lbl) label.append(marker) mean = mean / np.float(num_exm) for i in range(num_exm): X[i] = X[i] - mean.reshape((feats, 1)).repeat(lens, axis=1) return SOHMM(X[:num_train],Y[:num_train]), SOHMM(X[num_train:],Y[num_train:]), SOHMM(X,Y), label
def test_hmad(phi, kern, train, test, num_train, anom_prob, labels, zero_shot=False, param=2): auc = 0.5 ntrain = SOHMM(train.X, train.y, num_states=param) ntest = SOHMM(test.X, test.y, num_states=param) # train structured anomaly detection sad = LatentOCSVM(train, C=1.0 / (num_train * anom_prob)) (lsol, lats, thres) = sad.train_dc(max_iter=60, zero_shot=zero_shot) (pred_vals, pred_lats) = sad.apply(test) (fpr, tpr, thres) = metric.roc_curve(labels[num_train:], pred_vals) auc = metric.auc(fpr, tpr) return auc
total_len = 0 for i in range(EXMS): total_len += len(combY[i]) print('---> Total length = {0}.'.format(total_len)) trainX = combX[0:NUM_TRAIN_ANOM] trainX.extend(X[0:NUM_TRAIN_NON]) trainY = combY[0:NUM_TRAIN_ANOM] trainY.extend(Y[0:NUM_TRAIN_NON]) testX = combX[NUM_TRAIN_ANOM:NUM_COMB_ANOM] testX.extend(X[NUM_TRAIN_NON:NUM_COMB_NON]) testY = combY[NUM_TRAIN_ANOM:NUM_COMB_ANOM] testY.extend(Y[NUM_TRAIN_NON:NUM_COMB_NON]) train = SOHMM(trainX, trainY, num_states=2) test = SOHMM(testX, testY, num_states=2) comb = SOHMM(combX, combY, num_states=2) inds_train = co.matrix( range(NUM_TRAIN_ANOM) + range(NUM_COMB_ANOM, NUM_COMB_ANOM + NUM_TRAIN_NON)) inds_test = co.matrix( range(NUM_TRAIN_ANOM, NUM_COMB_ANOM) + range(NUM_COMB_ANOM + NUM_TRAIN_NON, NUM_COMB_ANOM + NUM_COMB_NON)) # init result cache if not all_auc.has_key('SSVM'): # collect aucs all_auc['OcSvm (Hist 4)'] = []