Пример #1
0
def get_model(num_exm,
              num_train,
              lens,
              block_len,
              blocks=1,
              anomaly_prob=0.15):
    print(
        'Generating {0} sequences, {1} for training, each with {2} anomaly probability.'
        .format(num_exm, num_train, anomaly_prob))
    cnt = 0
    X = []
    Y = []
    label = []
    lblcnt = co.matrix(0.0, (1, lens))
    for i in range(num_exm):
        (exm, lbl,
         marker) = ToyData.get_2state_anom_seq(lens,
                                               block_len,
                                               anom_prob=anomaly_prob,
                                               num_blocks=blocks)
        cnt += lens
        X.append(exm)
        Y.append(lbl)
        label.append(marker)
        # some lbl statistics
        if i < num_train:
            lblcnt += lbl
    X = normalize_sequence_data(X)
    return (SOHMM(X[0:num_train],
                  Y[0:num_train]), SOHMM(X[num_train:],
                                         Y[num_train:]), SOHMM(X, Y), label)
Пример #2
0
def get_model(num_exm, num_train, lens, feats, anomaly_prob=0.15):
    print(
        'Generating {0} sequences, {1} for training, each with {2} anomaly probability.'
        .format(num_exm, num_train, anomaly_prob))
    mean = 0.0
    cnt = 0
    X = []
    Y = []
    label = []
    for i in range(num_exm):
        (exm, lbl,
         marker) = ToyData.get_2state_gaussian_seq(lens,
                                                   dims=feats,
                                                   anom_prob=anomaly_prob)
        #if i<4:
        #	(exm,lbl) = ToyData.get_2state_gaussian_seq(LENS,dims=2,means1=[1,-3],means2=[3,7],vars1=[1,1],vars2=[1,1])
        mean += co.matrix(1.0, (1, lens)) * exm.trans()
        cnt += lens
        X.append(exm)
        Y.append(lbl)
        label.append(marker)

    mean = mean / float(cnt)
    for i in range(num_exm):
        #if (i<10):
        #	pos = int(np.single(co.uniform(1))*float(LENS)*0.8 + 4.0)
        #	print pos
        #	trainX[i][1,pos] = 100.0
        for d in range(feats):
            X[i][d, :] = X[i][d, :] - mean[d]
    return (SOHMM(X[0:num_train],
                  Y[0:num_train]), SOHMM(X[num_train:],
                                         Y[num_train:]), SOHMM(X, Y), label)
Пример #3
0
def build_fisher_kernel(data,
                        labels,
                        num_train,
                        ord=2,
                        param=2,
                        set_rand=False):
    # estimate the transition and emission matrix given the training
    # data only. Number of states is specifified in 'param'.
    N = len(data)
    (F, LEN) = data[0].size

    A = np.zeros((param, param))
    E = np.zeros((param, F))

    phi = co.matrix(0.0, (param * param + F * param, N))
    cnt = 0
    cnt_states = np.zeros(param)
    for n in xrange(num_train):
        lbl = np.array(labels[n])[0, :]
        exm = np.array(data[n])
        for i in range(param):
            for j in range(param):
                A[i, j] += np.where((lbl[:-1] == i) & (lbl[1:] == j))[0].size
        for i in range(param):
            for f in range(F):
                inds = np.where(lbl == i)[0]
                E[i, f] += np.sum(exm[f, inds])
                cnt_states[i] += inds.size
        cnt += LEN

    for i in range(param):
        E[i, :] /= cnt_states[i]
    sol = co.matrix(
        np.vstack(
            (A.reshape(param * param, 1) / float(cnt), E.reshape(param * F,
                                                                 1))))
    print sol

    if set_rand:
        print('Set random parameter vector for Fisher kernel.')
        # sol = co.uniform(param*param+param*F, a=-1.0, b=+1.0)
        sol = co.uniform(param * param + param * F)

    model = SOHMM(data, labels)
    for n in range(N):
        (val, latent, phi[:, n]) = model.argmax(sol, n)
        phi[:, n] /= np.linalg.norm(phi[:, n], ord=ord)

    kern = Kernel.get_kernel(phi, phi)
    return kern, phi
Пример #4
0
def get_model(num_exm, num_train, lens, feats, anomaly_prob=0.15):
    print('Generating {0} sequences, {1} for training, each with {2} anomaly probability.'.format(num_exm, num_train, anomaly_prob))
    mean = np.zeros(feats)
    cnt = 0
    X = []
    Y = []
    label = []
    for i in range(num_exm):
        exm, lbl, marker = get_2state_gaussian_seq(lens, dims=feats, anom_prob=anomaly_prob)
        mean += np.ones((1, lens)).dot(exm.T).reshape(feats) / np.float(lens)
        X.append(exm)
        Y.append(lbl)
        label.append(marker)
    mean = mean / np.float(num_exm)
    for i in range(num_exm):
        X[i] = X[i] - mean.reshape((feats, 1)).repeat(lens, axis=1)
    return SOHMM(X[:num_train],Y[:num_train]), SOHMM(X[num_train:],Y[num_train:]), SOHMM(X,Y), label
Пример #5
0
def test_hmad(phi,
              kern,
              train,
              test,
              num_train,
              anom_prob,
              labels,
              zero_shot=False,
              param=2):
    auc = 0.5

    ntrain = SOHMM(train.X, train.y, num_states=param)
    ntest = SOHMM(test.X, test.y, num_states=param)

    # train structured anomaly detection
    sad = LatentOCSVM(train, C=1.0 / (num_train * anom_prob))
    (lsol, lats, thres) = sad.train_dc(max_iter=60, zero_shot=zero_shot)
    (pred_vals, pred_lats) = sad.apply(test)
    (fpr, tpr, thres) = metric.roc_curve(labels[num_train:], pred_vals)
    auc = metric.auc(fpr, tpr)
    return auc
Пример #6
0
            total_len = 0
            for i in range(EXMS):
                total_len += len(combY[i])
            print('---> Total length = {0}.'.format(total_len))

            trainX = combX[0:NUM_TRAIN_ANOM]
            trainX.extend(X[0:NUM_TRAIN_NON])
            trainY = combY[0:NUM_TRAIN_ANOM]
            trainY.extend(Y[0:NUM_TRAIN_NON])

            testX = combX[NUM_TRAIN_ANOM:NUM_COMB_ANOM]
            testX.extend(X[NUM_TRAIN_NON:NUM_COMB_NON])
            testY = combY[NUM_TRAIN_ANOM:NUM_COMB_ANOM]
            testY.extend(Y[NUM_TRAIN_NON:NUM_COMB_NON])

            train = SOHMM(trainX, trainY, num_states=2)
            test = SOHMM(testX, testY, num_states=2)
            comb = SOHMM(combX, combY, num_states=2)

            inds_train = co.matrix(
                range(NUM_TRAIN_ANOM) +
                range(NUM_COMB_ANOM, NUM_COMB_ANOM + NUM_TRAIN_NON))
            inds_test = co.matrix(
                range(NUM_TRAIN_ANOM, NUM_COMB_ANOM) +
                range(NUM_COMB_ANOM + NUM_TRAIN_NON, NUM_COMB_ANOM +
                      NUM_COMB_NON))

            # init result cache
            if not all_auc.has_key('SSVM'):
                # collect aucs
                all_auc['OcSvm (Hist 4)'] = []