示例#1
0
文件: tree-lstm.py 项目: happywwy/EEA
#
# The figure displays one sample of the SST dataset, which is a
# constituency parse tree with their nodes labeled with sentiment. To
# speed up things, let's build a tiny set with 5 sentences and take a look
# at the first one:
#

import dgl
from dgl.data.tree import SST
from dgl.data import SSTBatch

# Each sample in the dataset is a constituency tree. The leaf nodes
# represent words. The word is a int value stored in the "x" field.
# The non-leaf nodes has a special word PAD_WORD. The sentiment
# label is stored in the "y" feature field.
trainset = SST(mode='tiny')  # the "tiny" set has only 5 trees
tiny_sst = trainset.trees
num_vocabs = trainset.num_vocabs
num_classes = trainset.num_classes

vocab = trainset.vocab  # vocabulary dict: key -> id
inv_vocab = {v: k
             for k, v in vocab.items()}  # inverted vocabulary dict: id -> word

a_tree = tiny_sst[0]
a_tree.draw()

for token in a_tree.ndata['x'].tolist():
    if token != trainset.PAD_WORD:
        print(inv_vocab[token], end=" ")
示例#2
0
def main(args):
    np.random.seed(args.seed)
    th.manual_seed(args.seed)
    th.cuda.manual_seed(args.seed)

    best_epoch = -1
    best_dev_acc = 0

    cuda = args.gpu >= 0
    device = th.device('cuda:{}'.format(args.gpu)) if cuda else th.device('cpu')
    if cuda:
        th.cuda.set_device(args.gpu)

    trainset = SST()
    train_loader = DataLoader(dataset=trainset,
                              batch_size=args.batch_size,
                              collate_fn=batcher(device),
                              shuffle=True,
                              num_workers=0)
    devset = SST(mode='dev')
    dev_loader = DataLoader(dataset=devset,
                            batch_size=100,
                            collate_fn=batcher(device),
                            shuffle=False,
                            num_workers=0)

    testset = SST(mode='test')
    test_loader = DataLoader(dataset=testset,
                             batch_size=100, collate_fn=batcher(device), shuffle=False, num_workers=0)

    model = TreeLSTM(trainset.num_vocabs,
                     args.x_size,
                     args.h_size,
                     trainset.num_classes,
                     args.dropout,
                     cell_type='childsum' if args.child_sum else 'nary',
                     pretrained_emb = trainset.pretrained_emb).to(device)
    print(model)
    params_ex_emb =[x for x in list(model.parameters()) if x.requires_grad and x.size(0)!=trainset.num_vocabs]
    params_emb = list(model.embedding.parameters())

    for p in params_ex_emb:
        if p.dim() > 1:
            INIT.xavier_uniform_(p)

    optimizer = optim.Adagrad([
        {'params':params_ex_emb, 'lr':args.lr, 'weight_decay':args.weight_decay},
        {'params':params_emb, 'lr':0.1*args.lr}])

    dur = []
    for epoch in range(args.epochs):
        t_epoch = time.time()
        model.train()
        for step, batch in enumerate(train_loader):
            g = batch.graph
            n = g.number_of_nodes()
            h = th.zeros((n, args.h_size)).to(device)
            c = th.zeros((n, args.h_size)).to(device)
            if step >= 3:
                t0 = time.time() # tik

            logits = model(batch, h, c)
            logp = F.log_softmax(logits, 1)
            loss = F.nll_loss(logp, batch.label, reduction='sum')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if step >= 3:
                dur.append(time.time() - t0) # tok

            if step > 0 and step % args.log_every == 0:
                pred = th.argmax(logits, 1)
                acc = th.sum(th.eq(batch.label, pred))
                root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0]
                root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids])

                print("Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}".format(
                    epoch, step, loss.item(), 1.0*acc.item()/len(batch.label), 1.0*root_acc/len(root_ids), np.mean(dur)))
        print('Epoch {:05d} training time {:.4f}s'.format(epoch, time.time() - t_epoch))

        # eval on dev set
        accs = []
        root_accs = []
        model.eval()
        for step, batch in enumerate(dev_loader):
            g = batch.graph
            n = g.number_of_nodes()
            with th.no_grad():
                h = th.zeros((n, args.h_size)).to(device)
                c = th.zeros((n, args.h_size)).to(device)
                logits = model(batch, h, c)

            pred = th.argmax(logits, 1)
            acc = th.sum(th.eq(batch.label, pred)).item()
            accs.append([acc, len(batch.label)])
            root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0]
            root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids])
            root_accs.append([root_acc, len(root_ids)])

        dev_acc = 1.0*np.sum([x[0] for x in accs])/np.sum([x[1] for x in accs])
        dev_root_acc = 1.0*np.sum([x[0] for x in root_accs])/np.sum([x[1] for x in root_accs])
        print("Epoch {:05d} | Dev Acc {:.4f} | Root Acc {:.4f}".format(
            epoch, dev_acc, dev_root_acc))

        if dev_root_acc > best_dev_acc:
            best_dev_acc = dev_root_acc
            best_epoch = epoch
            th.save(model.state_dict(), 'best_{}.pkl'.format(args.seed))
        else:
            if best_epoch <= epoch - 10:
                break

        # lr decay
        for param_group in optimizer.param_groups:
            param_group['lr'] = max(1e-5, param_group['lr']*0.99) #10
            print(param_group['lr'])

    # test
    model.load_state_dict(th.load('best_{}.pkl'.format(args.seed)))
    accs = []
    root_accs = []
    model.eval()
    for step, batch in enumerate(test_loader):
        g = batch.graph
        n = g.number_of_nodes()
        with th.no_grad():
            h = th.zeros((n, args.h_size)).to(device)
            c = th.zeros((n, args.h_size)).to(device)
            logits = model(batch, h, c)

        pred = th.argmax(logits, 1)
        acc = th.sum(th.eq(batch.label, pred)).item()
        accs.append([acc, len(batch.label)])
        root_ids = [i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degree(i)==0]
        root_acc = np.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids])
        root_accs.append([root_acc, len(root_ids)])

    test_acc = 1.0*np.sum([x[0] for x in accs])/np.sum([x[1] for x in accs])
    test_root_acc = 1.0*np.sum([x[0] for x in root_accs])/np.sum([x[1] for x in root_accs])
    print('------------------------------------------------------------------------------------')
    print("Epoch {:05d} | Test Acc {:.4f} | Root Acc {:.4f}".format(
        best_epoch, test_acc, test_root_acc))
示例#3
0
# constituency parse tree with their nodes labeled with sentiment. To
# speed up things, let's build a tiny set with 5 sentences and take a look
# at the first one:
#

import dgl
import pysnooper
from dgl.data.tree import SST
from dgl.data import SSTBatch
import sys

# Each sample in the dataset is a constituency tree. The leaf nodes
# represent words. The word is a int value stored in the "x" field.
# The non-leaf nodes has a special word PAD_WORD. The sentiment
# label is stored in the "y" feature field.
trainset = SST()  # the "tiny" set has only 5 trees
tiny_sst = trainset.trees
num_vocabs = trainset.num_vocabs
num_classes = trainset.num_classes

vocab = trainset.vocab  # vocabulary dict: key -> id
inv_vocab = {v: k
             for k, v in vocab.items()}  # inverted vocabulary dict: id -> word

a_tree = tiny_sst[0]
for token in a_tree.ndata['x'].tolist():
    if token != trainset.PAD_WORD:
        print(inv_vocab[token], end=" ")

##############################################################################
# Step 1: batching
示例#4
0
def main():
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    bestAll_epoch = -1
    bestRoot_epoch = -1
    bestRoot_acc = 0
    bestAll_acc = 0


    trainset = SST()  # default mode='train'
    vocab = trainset.vocab  # inclueding train,dev,test
    word_to_index = {word: id for word, id in vocab.items()} # inverted vocabulary dict: word -> id

    train_loader = DataLoader(dataset=trainset,
                              batch_size=batch_size, 
                              collate_fn=batcher(),
                              shuffle=True,
                              num_workers=0)

    testset = SST(mode='test')
    test_loader = DataLoader(dataset=testset,
                             batch_size=100, collate_fn=batcher(), shuffle=False, num_workers=0)

    model = RvNN(word_to_index,
                trainset.num_vocabs,
                emb_dim,
                trainset.num_classes,
                dropout).cuda()  
    print(model)
    # embedding和其他的参数变量分开,为了设置不同的学习率
    params_ex_emb =[x for x in list(model.parameters()) if x.requires_grad and x.size(0)!=trainset.num_vocabs]

    params_emb = list(model.embedding.parameters())

    for p in params_ex_emb:
        if p.dim() > 1:
            INIT.xavier_uniform_(p)   # 使用均匀分布初始化参数

    optimizer = optim.Adagrad([
        {'params':params_ex_emb, 'lr':LR, 'weight_decay':L2_reg}, 
        {'params':params_emb, 'lr':emb_LR}])   
    
    pt = table(["epoch", "Test Acc", "Root Acc", "Epoch Time"])
    t_epoch = time.time()  # start time
    start = t_epoch
    for epoch in range(Epoch):   # epochs
        model.train()
        for step, batch in enumerate(train_loader):
            g = batch.graph
            # g.set_n_initializer(dgl.init.zero_initializer)
            n = g.number_of_nodes()
            logits = model(batch)
            logp = F.log_softmax(logits, 1)
            loss = F.nll_loss(logp, batch.label, reduction='sum')

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        end = time.time()
        # test
        accs = []
        root_accs = []
        model.eval()

        for step, batch in enumerate(test_loader):
            g = batch.graph
            g.set_n_initializer(dgl.init.zero_initializer)
            n = g.number_of_nodes()
            # 禁止梯度计算
            with    torch.no_grad():
                
                logits = model(batch)  # (n, 5)

            pred =  torch.argmax(logits, 1)  # (n, 1)
            acc = torch.sum(torch.eq(batch.label, pred)).item()
            accs.append([acc, len(batch.label)])
            root_ids = [i for i in range(n) if batch.graph.out_degree(i) == 0]
            # root_acc = torch.sum(batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids])
            root_acc =  torch.sum(batch.label.data[root_ids] == pred.data[root_ids]).item()
            root_accs.append([root_acc, len(root_ids)])

        acc = 1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs])
        root_acc = 1.0 * np.sum([x[0] for x in root_accs]) / np.sum([x[1] for x in root_accs])

        if acc > bestAll_acc:
            bestAll_acc = acc
            bestAll_epoch = epoch
        if root_acc > bestRoot_acc:
            bestRoot_acc = root_acc
            bestRoot_epoch = epoch

        pt.row([epoch, acc, root_acc, end - start])
        start = end

    # summary, including total time of training
    print("BestAll_epoch_test: {}   BestAll_acc_test: {:.4f}".format(bestAll_epoch, bestAll_acc))
    print("BestRoot_epoch_test: {}   BestRoot_acc_test: {:.4f}".format(bestRoot_epoch, bestRoot_acc))
    print("Total time:", time.time() - t_epoch)
示例#5
0
                        wordid=batch_trees.ndata['x'].to(dev),
                        label=batch_trees.ndata['y'].to(dev))

    return batcher_dev


if __name__ == '__main__':
    device = th.device('cpu')
    x_size = 256
    h_size = 256
    dropout = 0.5
    lr = 0.05
    weight_decay = 1e-4
    epochs = 10
    # 'x' stands for word in form of int, 'y' stands for the labels,
    trainset = SST(mode='tiny')  # 5
    tiny_sst = trainset.trees  # 5 dglGraph list
    num_vocabs = trainset.num_vocabs  # 19536
    num_classes = trainset.num_classes  # 5 classes

    vocab = trainset.vocab  # orderedDict([(word: int)]) 19536
    print(len(vocab))
    inv_vocab = {v: k for k, v in vocab.items()}

    a_tree = tiny_sst[0]
    # print(a_tree.ndata['x'])
    # print(a_tree.ndata['x'].tolist())
    res = []
    for token in a_tree.ndata['x'].tolist():
        if token != trainset.PAD_WORD:
            # print(inv_vocab[token], end=' ')
示例#6
0
def main(args):
    np.random.seed(args.seed)
    th.manual_seed(args.seed)
    th.cuda.manual_seed(args.seed)

    best_epoch = -1
    cuda = args.gpu >= 0
    device = th.device('cuda:{}'.format(
        args.gpu)) if cuda else th.device('cpu')
    if cuda:
        th.cuda.set_device(args.gpu)
    trainset = SST()
    graphset, train_graphset, node_attrs, G, A, G0, g_wwl, rootid = Datagenerator(
    )
    model = TreeLSTM(
        trainset.num_vocabs,
        args.x_size,
        args.h_size,
        trainset.num_classes,
        args.dropout,
        # cell_type='childsum' if args.child_sum else 'nary',
        cell_type='childsum',
        pretrained_emb=trainset.pretrained_emb).to(device)
    params_ex_emb = [
        x for x in list(model.parameters())
        if x.requires_grad and x.size(0) != trainset.num_vocabs
    ]
    params_emb = list(model.embedding.parameters())

    for p in params_ex_emb:
        if p.dim() > 1:
            INIT.xavier_uniform_(p)
    optimizer = optim.Adam([{
        'params': params_ex_emb,
        'lr': args.lr,
        'weight_decay': args.weight_decay
    }, {
        'params': params_emb,
        'lr': 0.1 * args.lr
    }])
    # optimizer = optim.Adam(model.parameters(), lr=0.01)
    dur = []
    #Reorganize the read dataframe into a list
    label_duration = []
    feature_name = []
    feature_name_word = []
    Roleinstance_name = []
    ActivityStart = []
    NodeID = []
    RootActivityId = []
    ParentActivityId = []
    ActivityId = []
    labelclass = []
    Tid = []
    for k, v in node_attrs.items():
        count = 0
        vec = []
        for k1, v1 in v.items():
            # print("")
            if len(v) == 2:
                if count == 0:
                    label_duration.append(v1)
                if count == 1:
                    doc = nlp(v1)
                    vec = doc.vector

                    feature_name.append(vec.tolist())
                    feature_name_word.append(v1)
                    vec = vec[0:25].tolist()

                if count == 2:
                    ActivityStart.append(v1)
                if count == 3:
                    NodeID.append(v1)
                if count == 4:
                    RootActivityId.append(v1)
                if count == 5:
                    ParentActivityId.append(v1)
                if count == 6:
                    ActivityId.append(v1)
                if count == 7:
                    Tid.append(v1)
                count = count + 1
            else:
                if count == 1:
                    label_duration.append(v1)
                if count == 2:
                    # print("2 v1", v1)
                    doc = nlp(v1)
                    vec1 = doc.vector
                    vec = vec1[0:20].tolist()
                    feature_name_word.append(v1)
                if count == 3:
                    # print("3 v1",v1)
                    doc = nlp(v1)
                    vec1 = doc.vector
                    vec.extend(vec1[0:5].tolist())
                    # ActivityStart.append(v1)
                if count == 4:
                    labelclass.append(int(v1))
                if count == 6:
                    ##cluster

                    doc = nlp(v1)
                    vec1 = doc.vector
                    Roleinstance_name.append(v1)
                    vec.extend(vec1[0:5].tolist())
                if count == 7:
                    ##cluster
                    doc = nlp(v1)
                    ActivityId.append(v1)
                if count == 8:
                    labelclass.append(int(v1))
                count = count + 1
        feature_name.append(vec)
    feature_name_np = np.array(feature_name)
    kernel_matrix, node_representations = wwl(g_wwl,
                                              node_features=feature_name_np,
                                              num_iterations=1)

    feature_name_np2 = np.column_stack((
        node_representations[0][0:feature_name_np.shape[0]],
        feature_name_np,
    ))
    feature_name_np_tensor = th.tensor(feature_name_np2, dtype=th.float32)
    g = graphset[0]
    n = g.number_of_nodes()
    feature_name_np_tensor1 = feature_name_np_tensor
    label_duration_tensor = th.tensor(label_duration, dtype=th.float32)
    labelclass = th.tensor(labelclass, dtype=th.float32)
    """
    train part
    """

    label_duration_tensor1 = label_duration_tensor.type(th.FloatTensor)
    label_duration_tensor1 = label_duration_tensor1.reshape(
        label_duration_tensor1.shape[0], 1)

    feature_name_np_tensor_aggragte = np.zeros([feature_name_np.shape[0], 32])
    feature_name_np_tensor_aggragte_2np = np.zeros(
        [feature_name_np.shape[0], 50])

    for i in range(feature_name_np.shape[1] - 2):

        path_all = networkx.shortest_path(G0, source=(i + 1))
        pathlist = list(path_all.values())[-1]
        for k in range(len(pathlist)):

            feature_name_np_tensor_aggragte[i] = feature_name_np_tensor1[
                pathlist[k]] + feature_name_np_tensor_aggragte[i]

        feature_name_np_tensor_aggragte_2np[i][0:32] = feature_name_np_tensor1[
            i]
        feature_name_np_tensor_aggragte_2np[i][32:50] = (
            feature_name_np_tensor_aggragte[i][0:18])
    feature_name_np_tensor_aggragte_2 = torch.from_numpy(
        feature_name_np_tensor_aggragte_2np).type(torch.FloatTensor)
    import pickle
    picklefile1 = open("feature_name_np_tensor_aggragte_2np.pkl", "wb")
    pickle.dump(feature_name_np_tensor_aggragte_2np, picklefile1)
    picklefile1.close()
    ####################################################################

    labelclass_session = labelclass[rootid]

    # for epoch in range(1000):
    #     t_epoch = time.time()
    #     model.train()
    #
    #     t0 = time.time() # tik
    #
    #     h = th.zeros((feature_name_np_tensor1.shape[0], feature_name_np_tensor1.shape[1]))
    #     c = th.zeros((feature_name_np_tensor1.shape[0], feature_name_np_tensor1.shape[1]))
    #     # logits ,classlogits= model(g,G, h, c,feature_name_np_tensor1)
    #     logits, classlogits = model(g, G, h, c, feature_name_np_tensor_aggragte_2,rootid,epoch)
    #     logp=logits.type(th.FloatTensor)
    #
    #
    #     labelclass=  labelclass_session.type(th.LongTensor)
    #     # logp=logp.reshape(k,1)
    #     labelclass = labelclass.reshape(len(rootid))
    #
    #     loss = F.mse_loss(logp, labelclass, size_average=False)
    #
    #     logp_class=F.log_softmax(classlogits, dim=1)
    #
    #     logp_class=logp_class.type(th.FloatTensor)
    #
    #     logp_class = logp_class.reshape([ len(rootid), 2])
    #
    #     loss1 = F.nll_loss(logp_class, labelclass)
    #
    #     labelclass =np.array(labelclass)
    #     labelclass=torch.from_numpy(labelclass).type(torch.LongTensor)
    #
    #     optimizer.zero_grad()
    #     loss1.backward()
    #     optimizer.step()
    #     dur.append(time.time() - t0) # tok
    #     pred = logp_class.data.max(1, keepdim=True)[1]
    #     acc = pred.eq(labelclass.data.view_as(pred)).cpu().sum().item() / float(labelclass.size()[0])
    #
    #     print("Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}",
    #                 epoch, loss1.item(),acc)
    #     file_handle1 = open(
    #         '1029_loss_sumVMOnCreate611_nodenumtrain_bin1.txt',
    #         mode='a')
    #     print(str(epoch), file=file_handle1)
    #     print(str(loss.item()), file=file_handle1)
    #     file_handle1.close()
    #
    # th.save(model.state_dict(), 'train.pkl'.format(args.seed))
    ###############################################################################################
    """
        test part
        """
    model.load_state_dict(th.load('train.pkl'.format(args.seed)))
    accs = []
    model.eval()
    # label_duration_tensor_test = label_duration_tensor.type(th.FloatTensor)
    label_duration_tensor_test = labelclass.type(th.FloatTensor)
    feature_name_np_tensor_test = feature_name_np_tensor
    feature_name_word_test = feature_name_word
    for step in range(500):
        g = graphset[0]
        n = g.number_of_nodes()
        with th.no_grad():
            h = th.zeros((n, args.h_size)).to(device)
            c = th.zeros((n, args.h_size)).to(device)

            logits, classlogits = model(g, G, h, c,
                                        feature_name_np_tensor_aggragte_2,
                                        rootid, epoch)

            # logp_class=classlogits
            logp_class = F.log_softmax(classlogits, dim=1)

            file_handle3 = open('logp_class.txt', mode='a')
            logp_class.numpy()
            import pickle
            picklefile = open("logp_class_abnormal_normal.pkl", "wb")
            pickle.dump(logp_class, picklefile)
            picklefile.close()

            print("logp_class", logp_class.numpy().tolist(), file=file_handle3)
            file_handle3.close()
            logp_class = logp_class.type(th.FloatTensor)

            logp = logits.type(th.FloatTensor)
            # pred = logp_class.data.max(1, keepdim=True)[1]

    import pandas as pd

    logpnp = np.array(logp)

    test_acc = 91
    label_duration_tensor_test = th.tensor(label_duration_tensor_test,
                                           dtype=th.int)

    label_duration_tensor_test = label_duration_tensor_test.reshape(
        len(rootid), 1)
    """
        caculate mape
        """

    loss_test = mape(logp, label_duration_tensor_test)

    logp = logp.reshape([1, len(rootid)])

    label_duration_tensor_test = label_duration_tensor_test.reshape(
        [1, len(rootid)])
    # label_duration_tensor_test = label_duration_tensor_test.reshape([1, 200])
    print("label_duration_tensor_test", label_duration_tensor_test.shape)
    print("logp", logp.shape)

    # logp1.dtype='float32'
    # print("logp", logp1.dtype)

    label_duration_tensor_test1 = np.array(label_duration_tensor_test,
                                           dtype=np.int32)
    # label_duration_tensor_test.dtype='float32'
    print("label_duration_tensor_test", label_duration_tensor_test.dtype)
    label_duration_tensor_test1 = label_duration_tensor_test1.tolist()[0]

    print("label_duration_tensor_test1", len(label_duration_tensor_test1))
    print("label_duration_tensor_test1", label_duration_tensor_test1)

    distribution = torch.argmax(logp_class, dim=1)
    print("distribution", distribution)

    # logp1= distribution.reshape([4, 261])
    logp1 = np.array(distribution, dtype=np.int32)
    selector = SelectKBest(chi2, k=2)
    input = []

    for i in range(len(feature_name_np_tensor_aggragte_2.numpy().tolist())):

        input.append(
            list(
                map(abs,
                    feature_name_np_tensor_aggragte_2.numpy().tolist()[i])))

    X = feature_name_np_tensor_aggragte_2np
    # print("X_new.scores_", selector.transform(X))
    logp1 = logp1.tolist()

    listlog = distribution.numpy().tolist()
    label_duration_tensor_test1_np = np.array(label_duration_tensor_test1)
    Abnormlist_np = np.where((distribution == 2) | (distribution == 1), )
    # K = cos(logp_class, logp_class.t())
    K = getdistances(logp_class)
    for i in range(Abnormlist_np[0].shape[0]):
        causeroot = []
        similarity = []
        if i != 0:

            path = networkx.shortest_path(G0, source=Abnormlist_np[0][i])
            print("path", path)
            list(path.values())
            list_path = list(path.values())
            print("list_path", list_path)
            rootcausedep = []
            for iii in range(len(list_path)):
                for jjj in range(len(list_path[iii])):
                    if list_path[iii][jjj] not in rootcausedep and (
                            list_path[iii][jjj] != Abnormlist_np[0][i]):
                        rootcausedep.append(list_path[iii][jjj])
                        # similarity.append(K[Abnormlist_np[0][i]][list_path[iii][jjj]])
            print("rootcausedep", rootcausedep)
            # similarity

            for j in range(len(rootcausedep)):
                KJ = 0
                for jk in range(len(rootcausedep)):
                    if jk is not j:
                        KJ = K[rootcausedep[j]][rootcausedep[jk]] + KJ
                KJ = KJ + K[rootcausedep[j]][Abnormlist_np[0][i]]
                if KJ is not 0:
                    similarity.append(KJ)

            print("similarity", similarity)
            if len(similarity) > 0:
                max_index = similarity.index(max(similarity, key=abs))
                print("rootcausedep", rootcausedep, rootcausedep[max_index])

    print("test 0", sum(distribution == 0))
    print("test 1", sum(distribution == 1))
    print("test 2", sum(distribution == 2))
    print("test 3", sum(distribution == 3))
    print("label 0", label_duration_tensor_test1.count(0))
    print("label 1", label_duration_tensor_test1.count(1))
    print("label 2", label_duration_tensor_test1.count(2))
    print("label 3", label_duration_tensor_test1.count(3))
    # logp1
    print("logp1", len(logp1))
    print("label_duration_tensor_test1", len(label_duration_tensor_test1))
    f1score = sk.metrics.f1_score(logp1,
                                  label_duration_tensor_test1,
                                  average='micro')
    print("f1score", f1score)
    print("Epoch {:05d} | Test Acc {:.4f} | MAPE Loss {:.4f},f1score",
          best_epoch, test_acc, loss_test, f1score)
    # loss_test = mape(logp, label_duration_tensor_test[0:522])

    abs_duration = abs(label_duration_tensor_test - logp)
    # abs_duration = abs(label_duration_tensor_test[0:522] - logp)
    abs_duration = abs_duration
    id = th.where(abs_duration > 0.05)
    id1 = th.where(abs_duration > 0.1)
    id11 = th.where(abs_duration >= 1)
    id4 = th.where(abs_duration > 0.4)
    id44 = np.array(id[0])
    id44list = id44.tolist()
    feature_name_wordkk = []
    ActivityStartkk = []
    ActivityIdkk = []
    label_durationkk = []
    logpkk = []
    abs_duration = (abs_duration).numpy()
    idk = heapq.nlargest(3000, range(len(abs_duration)),
                         abs_duration.__getitem__)
    idklist = idk
    id44list = idklist
    logpk = []
    print("len(idklist)", len(idklist))
    print("len(feature_name_word_test)", len(feature_name_word_test))
    for i in range(len(id44list)):
        print("i", i)
        feature_name_wordkk.append(feature_name_word_test[id44list[i]])

        label_durationkk.append(label_duration[id44list[i]])
        logpkk.append(abs_duration[id44list[i]])
        logpk.append(logp[id44list[i]])
    print("id0.05", id)
    print("id0.05", len(id[0]))
    print("id0.1", id1)
    print("id0.1", len(id1[0]))
    print("id0.01", id11)
    print("id0.01", len(id11[0]))
    print("id0.01", len(id11[0]) / 100)
    print("AnomalyID>0.01", len(id44list))
    """
        save result txt
        """
    file_handle2 = open('1029sum_fristVMOnCreate611_nodenum_bin1.txt',
                        mode='a')
    from collections import Counter
    import operator
    # 进行统计
    a = dict(Counter(feature_name_wordkk))
    # 给得出的word进行排序
    b = sorted(a.items(), key=operator.itemgetter(1), reverse=True)

    for i in range(len(id44list)):
        print("index", str(i), file=file_handle2)
        print("indexcsv", str(id44list[i]), file=file_handle2)
        print("activity name", str(feature_name_wordkk[i]), file=file_handle2)
        # print("ActivityId",str(ActivityIdkk[i]), file=file_handle2)
        print("label duration", str(label_durationkk[i]), file=file_handle2)
        print("abs_duration", logpkk[i], file=file_handle2)
        print("predict duration", logpk[i], file=file_handle2)
    file_handle2.close()
    file_handle3 = open('0127sumaccVMOnCreate_nodenum_bin1.txt', mode='a')
    print("ActivityId", str(b), file=file_handle3)
    file_handle3.close()
    print(
        '------------------------------------------------------------------------------------'
    )
    print("Epoch {:05d} | Test Acc {:.4f} | MAPE Loss {:.4f},f1score",
          best_epoch, test_acc, loss_test, f1score)
    file_handle4 = open('0127mean_mapeVMOnCreate611_nodenum_bin1.txt',
                        mode='a')
    print("mape", file=file_handle4)
    print(str(loss_test), file=file_handle4)
    file_handle4.close()
    file_handle1 = open('0127_loss_sumVMOnCreate611_nodenumtest.txt', mode='a')
    # print(str(epoch), file=file_handle1)
    print(str(test_acc), file=file_handle1)
    # print(str(loss.item()), file=file_handle1)
    file_handle1.close()
    # print(str(), file=file_handle1)
    print("node_representations", node_representations)
    print("rootid", rootid)
    label_session = []
    for i in range(len(rootid)):
        label_session.append(label_duration_tensor_test1[i])

    print("sessionlabel", label_session)