def __init__(self, nwords, nchars, ntags, pretrained_list): super().__init__() # Create word embeddings pretrained_tensor = torch.FloatTensor(pretrained_list) self.word_embedding = torch.nn.Embedding.from_pretrained( pretrained_tensor, freeze=False) # Create input dropout parameter self.word_dropout = torch.nn.Dropout(1 - KEEP_PROB) # Create LSTM parameters self.lstm = torch.nn.LSTM(DIM_EMBEDDING + CHAR_LSTM_HIDDEN, LSTM_HIDDEN, num_layers=LSTM_LAYER, batch_first=True, bidirectional=True) # Create output dropout parameter self.lstm_output_dropout = torch.nn.Dropout(1 - KEEP_PROB) # Character-level LSTMs self.char_embedding = torch.nn.Embedding(nchars, CHAR_DIM_EMBEDDING) self.char_lstm = torch.nn.LSTM(CHAR_DIM_EMBEDDING, CHAR_LSTM_HIDDEN, num_layers=1, batch_first=True, bidirectional=False) # Create final matrix multiply parameters self.hidden_to_tag = torch.nn.Linear(LSTM_HIDDEN * 2, ntags + 2) self.ssvm = SSVM(target_size=ntags)
def experiment_anomaly_segmentation(train, test, comb, num_train, anom_prob, labels): # transductive train/pred for structured anomaly detection sad = StructuredOCSVM(comb, C=1.0/(num_train*0.5)) (lsol, lats, thres) = sad.train_dc(max_iter=40) (cont, cont_exm) = test.evaluate(lats[num_train:]) # train structured svm ssvm = SSVM(train) (sol, slacks) = ssvm.train() (vals, preds) = ssvm.apply(test) (base_cont, base_cont_exm) = test.evaluate(preds) return (cont, base_cont)
def experiment_anomaly_segmentation(train, test, comb, num_train, anom_prob, labels): # transductive train/pred for structured anomaly detection sad = StructuredOCSVM(comb, C=1.0 / (num_train * 0.5)) (lsol, lats, thres) = sad.train_dc(max_iter=40) (cont, cont_exm) = test.evaluate(lats[num_train:]) # train structured svm ssvm = SSVM(train) (sol, slacks) = ssvm.train() (vals, preds) = ssvm.apply(test) (base_cont, base_cont_exm) = test.evaluate(preds) return (cont, base_cont)
def perf_ssvm(test_inds, marker, train, test): # SAD annotation print('(a) Setup SSVM...') ssvm = SSVM(train, C=10.0) print('(b) Train SSVM...') (lsol, slacks) = ssvm.train() print('(c) Evaluate SSVM...') (scores, lats) = ssvm.apply(test) (err, err_exm) = test.evaluate(lats) res = (err['fscore'], err['precision'], err['sensitivity'], err['specificity']) (fpr, tpr, thres) = metric.roc_curve(co.matrix(marker)[test_inds], -scores) auc = metric.auc(fpr, tpr) print('(d) Return AUC={0}...'.format(auc)) print res return auc, res
if len(dat_obj.traj_dict[tid]) >= 2 } good_partition = True for j in test_ix: if keys_cv[j][0] not in poi_set: good_partition = False break if good_partition is True: poi_list = sorted(poi_set) break # train ssvm = SSVM(inference_train=inference_method, inference_pred=inference_method, dat_obj=dat_obj, share_params=SSVM_SHARE_PARAMS, multi_label=SSVM_MULTI_LABEL, C=ssvm_C, poi_info=poi_info_i.loc[poi_list].copy()) if ssvm.train(sorted(trajid_set_train), n_jobs=N_JOBS) is True: for j in test_ix: # test ps_cv, L_cv = keys_cv[j] y_hat_list = ssvm.predict(ps_cv, L_cv) if y_hat_list is not None: F1, pF1, tau = evaluate(dat_obj, keys_cv[j], y_hat_list) F1_ssvm.append(F1) pF1_ssvm.append(pF1) Tau_ssvm.append(tau) else: for j in test_ix: F1_ssvm.append(0)
class TaggerModel(torch.nn.Module): def __init__(self, nwords, nchars, ntags, pretrained_list): super().__init__() # Create word embeddings pretrained_tensor = torch.FloatTensor(pretrained_list) self.word_embedding = torch.nn.Embedding.from_pretrained( pretrained_tensor, freeze=False) # Create input dropout parameter self.word_dropout = torch.nn.Dropout(1 - KEEP_PROB) # Create LSTM parameters self.lstm = torch.nn.LSTM(DIM_EMBEDDING + CHAR_LSTM_HIDDEN, LSTM_HIDDEN, num_layers=LSTM_LAYER, batch_first=True, bidirectional=True) # Create output dropout parameter self.lstm_output_dropout = torch.nn.Dropout(1 - KEEP_PROB) # Character-level LSTMs self.char_embedding = torch.nn.Embedding(nchars, CHAR_DIM_EMBEDDING) self.char_lstm = torch.nn.LSTM(CHAR_DIM_EMBEDDING, CHAR_LSTM_HIDDEN, num_layers=1, batch_first=True, bidirectional=False) # Create final matrix multiply parameters self.hidden_to_tag = torch.nn.Linear(LSTM_HIDDEN * 2, ntags + 2) self.ssvm = SSVM(target_size=ntags) def forward(self, sentences, mask, sent_tokens, labels, lengths, cur_batch_size): max_length = sentences.size(1) # Look up word vectors word_vectors = self.word_embedding(sentences) # Apply dropout dropped_word_vectors = self.word_dropout(word_vectors) sent_tokens = sent_tokens.view(cur_batch_size * max_length, -1) token_vectors = self.char_embedding(sent_tokens) char_lstm_out, (hn, cn) = self.char_lstm(token_vectors, None) char_lstm_out = hn[-1].view(cur_batch_size, max_length, CHAR_LSTM_HIDDEN) concat_vectors = torch.cat((dropped_word_vectors, char_lstm_out), dim=2) # Run the LSTM over the input, reshaping data for efficiency packed_words = torch.nn.utils.rnn.pack_padded_sequence( concat_vectors, lengths, True) lstm_out, _ = self.lstm(packed_words, None) lstm_out, _ = torch.nn.utils.rnn.pad_packed_sequence( lstm_out, batch_first=True, total_length=max_length) # Apply dropout lstm_out_dropped = self.lstm_output_dropout(lstm_out) # Matrix multiply to get scores for each tag output_scores = self.hidden_to_tag(lstm_out_dropped) loss = self.ssvm.hinge_loss(output_scores, mask, labels) predicted_tags = self.ssvm(output_scores, mask) return loss, predicted_tags
vars=[0.3, 0.3]) Dtrain4 = ToyData.get_gaussian(50, dims=2, means=[6.0, -3.0], vars=[0.2, 0.1]) Dtrain = co.matrix([[Dtrain1], [Dtrain2], [Dtrain3], [Dtrain4]]) Dtrain = co.matrix([[Dtrain.trans()], [co.matrix(1.0, (1250, 1))]]).trans() Dy = co.matrix([[co.matrix(0, (1, 1000))], [co.matrix(1, (1, 100))], [co.matrix(2, (1, 100))], [co.matrix(3, (1, 50))]]) # generate structured object sobj = SOMultiClass(Dtrain, NUM_CLASSES, Dy) # train svdd ssvm = SSVM(sobj, 1.0) (ws, slacks) = ssvm.train() print(ws) # print(slacks) # generate test data grid delta = 0.1 x = np.arange(-4.0, 8.0, delta) y = np.arange(-4.0, 8.0, delta) X, Y = np.meshgrid(x, y) (sx, sy) = X.shape Xf = np.reshape(X, (1, sx * sy)) Yf = np.reshape(Y, (1, sx * sy)) Dtest = np.append(Xf, Yf, axis=0) Dtest = np.append(Dtest, np.reshape([1.0] * (sx * sy), (1, sx * sy)),
mean, 1. * np.random.rand() * np.eye(2), size=NUM_DATA) Dy[i * NUM_DATA:(i + 1) * NUM_DATA] = i # generate structured object sobj = SOMultiClass(Dtrain.T, y=Dy, classes=NUM_CLASSES) # unsupervised methods lsvdd = LatentSVDD(sobj, 0.9) lsvdd.fit() spca = LatentPCA(sobj) spca.fit() socsvm = LatentOCSVM(sobj, .2) socsvm.fit() # supervised methods ssvm = SSVM(sobj) ssvm.train() # generate test data grid delta = 0.2 x = np.arange(-8.0, 8.0, delta) y = np.arange(-8.0, 8.0, delta) X, Y = np.meshgrid(x, y) (sx, sy) = X.shape Xf = np.reshape(X, (1, sx * sy)) Yf = np.reshape(Y, (1, sx * sy)) Dtest = np.append(Xf, Yf, axis=0) Dtest = np.append(Dtest, np.ones((1, sx * sy)), axis=0) print(Dtest.shape) # generate structured object
Dtrain3 = ToyData.get_gaussian(10,dims=2,means=[3.0,-3.0],vars=[0.3,0.3]) Dtrain4 = ToyData.get_gaussian(5,dims=2,means=[6.0,-3.0],vars=[0.2,0.1]) Dtrain = co.matrix([[Dtrain1], [Dtrain2], [Dtrain3], [Dtrain4]]) Dtrain = co.matrix([[Dtrain.trans()],[co.matrix(1.0,(125,1))]]).trans() Dy = co.matrix([co.matrix([0]*100), co.matrix([1]*10), co.matrix([2]*10), co.matrix([3]*5)]) # generate structured object sobj = SOMultiClass(Dtrain, y=Dy , classes=NUM_CLASSES) # unsupervised methods lsvdd = LatentSVDD(sobj,1.0/(125.0*1.0)) spca = StructuredPCA(sobj) socsvm = StructuredOCSVM(sobj,1.0/(125.0*1.0)) # supervised methods ssvm = SSVM(sobj) # generate test data grid delta = 0.2 x = np.arange(-8.0, 8.0, delta) y = np.arange(-8.0, 8.0, delta) X, Y = np.meshgrid(x, y) (sx,sy) = X.shape Xf = np.reshape(X,(1,sx*sy)) Yf = np.reshape(Y,(1,sx*sy)) Dtest = np.append(Xf,Yf,axis=0) Dtest = np.append(Dtest,np.reshape([1.0]*(sx*sy),(1,sx*sy)),axis=0) print(Dtest.shape) # generate structured object predsobj = SOMultiClass(co.matrix(Dtest),NUM_CLASSES)
# generate raw training data Dtrain1 = ToyData.get_gaussian(1000,dims=2,means=[4.0,2.0],vars=[1.0,0.3]) Dtrain2 = ToyData.get_gaussian(100,dims=2,means=[-2.0,1.0],vars=[0.3,1.3]) Dtrain3 = ToyData.get_gaussian(100,dims=2,means=[3.0,-1.0],vars=[0.3,0.3]) Dtrain4 = ToyData.get_gaussian(50,dims=2,means=[6.0,-3.0],vars=[0.2,0.1]) Dtrain = co.matrix([[Dtrain1], [Dtrain2], [Dtrain3], [Dtrain4]]) Dtrain = co.matrix([[Dtrain.trans()],[co.matrix(1.0,(1250,1))]]).trans() Dy = co.matrix([[co.matrix(0,(1,1000))], [co.matrix(1,(1,100))], [co.matrix(2,(1,100))], [co.matrix(3,(1,50))]]) # generate structured object sobj = SOMultiClass(Dtrain,NUM_CLASSES,Dy) # train svdd ssvm = SSVM(sobj,1.0) (ws,slacks) = ssvm.train() print(ws) # print(slacks) # generate test data grid delta = 0.1 x = np.arange(-4.0, 8.0, delta) y = np.arange(-4.0, 8.0, delta) X, Y = np.meshgrid(x, y) (sx,sy) = X.shape Xf = np.reshape(X,(1,sx*sy)) Yf = np.reshape(Y,(1,sx*sy)) Dtest = np.append(Xf,Yf,axis=0) Dtest = np.append(Dtest,np.reshape([1.0]*(sx*sy),(1,sx*sy)),axis=0) print(Dtest.shape)