示例#1
0
def load_dataset(f):
    """
    Loads a given dataset (.gsp file)
    :param f: dataset path
    :return: list of graphs
    """
    return load_graphs(f)
示例#2
0
def main(filename='gd10.txt', min_sup=8):
    filename = os.path.join(filepath, filename)
    graphs = load_graphs(filename)

    C = list()
    with open("Malware_sub_Goodware.txt", "r") as ins:
        content = ins.read().splitlines()
        temp = list()
        for line in content:
            if line != 'Pattern':
                line = re.sub(r'[\(\)]', '', line)
                u, v, L_u, L_v, L_uv = line.split(", ")
                u, v, L_u, L_v, L_uv = int(u), int(v), L_u.strip(
                    "'"), L_v.strip("'"), L_uv.strip("'")
                #print("u= {} v= {} L_u= {} L_v= {} L_uv= {}\n".format(u,v,L_u,L_v,L_uv))
                temp.append((u, v, L_u, L_v, L_uv))
            elif line == 'Pattern':
                C.append(temp)
                temp = []
    # for num, li in enumerate(C):
    #     for t in li:
    #         print("Element {}: {}\n".format(num,t[2]))

    for g, graph in enumerate(graphs):
        for p, li in enumerate(C):
            E = subgraph_isomorphisms(li, graph)
            if len(E) == 0:
                print("Graph: {} , Pattern: {} Not Matched".format(
                    g + 1, p + 1))
            else:
                print("Graph: {} , Pattern: {} Matched".format(g + 1, p + 1))
示例#3
0
文件: test.py 项目: yxtj/gSpan
def main(filename='data/exampleG.txt', min_sup=2):
    filename = os.path.join(filepath, filename)
    graphs = load_graphs(filename)
    n = len(graphs)
    extensions = []
    gSpan([], graphs, min_sup=min_sup, extensions=extensions)
    for i, ext in enumerate(extensions):
        print('Pattern %d' % (i + 1))
        for _c in ext:
            print(_c)
        print('')
示例#4
0
def main(filename=fname, min_sup=msup):
    start_time = time.time()
    filename = os.path.join(filepath, filename)
    graphs, maxW = load_graphs(filename, rough_min, span, db_size, bin_count)
    for wdt in WDT:
        print("*************")
        print(wdt)

    n = len(graphs)
    extensions = []

    canCount = 0
    gCount = 0
    fwsCount = 0
    oc = range(len(graphs))
    fwsCount, canCount, gCount = gSpan([],
                                       graphs,
                                       min_sup=min_sup,
                                       extensions=extensions,
                                       maxW=maxW,
                                       fwsCount=fwsCount,
                                       canCount=canCount,
                                       gCount=gCount,
                                       oc=oc)
    end_time = time.time()
    obj = open("output.txt", "w+")
    for i, ext in enumerate(extensions):
        obj.write('Pattern %d\n' % (i + 1))
        for _c in ext:
            obj.write(str(_c))
            obj.write('\n')
        obj.write('')

    obj.write("--- %s seconds ---\n" % (end_time - start_time))
    obj.write(str(fwsCount))
    obj.write('\n')
    obj.write(str(canCount))
    obj.write('\n')
    obj.write(str(gCount))
    obj.write('\n')
    obj.close()
def gspan(train_test_split, path_to_graph_dataset, min_sup):
    graphs=[]
    graph = load_graphs(path_to_graph_dataset)
    n = len(graph)
  
    idxs = np.arange(0,n,1)
   
    random.shuffle(idxs)
    #graphs = graph[idxs.tolist()]
   # print idxs
    for e in idxs:
    	graphs.append(graph[e])
	#print e
    #print graphs
    

    with open("/home/N1801734D/grid/graph_2D_CNN/datasets/classes/test/test_classes.txt", 'r') as f:
        y = f.read().splitlines()
        y = [int(elt) for elt in y]
    for e in y:
    	ys = y[e]
   # print("number of graphs: ", n)
    
    np.save("shuffled_graph_idxs", idxs)

    train_graphs = graphs[:(int)(train_test_split*n)]
    test_graphs = graphs[(int)(train_test_split*n):]
    
    #print("number of graphs for train: ", len(train_graphs), " for test: ", len(test_graphs))

    extensions = []
    gSpan([], train_graphs, min_sup=30, extensions=extensions)

    #with open("out.txt","w") as f:
    for i, ext in enumerate(extensions):
    #print('Pattern %d' % (i+1))
    	for _c in ext:
      		print(_c)
    			#f.write(_c)
    	print('')
    		#f.write('')
    
    
    #print("Compute subgraphs list in train graphs: ")
    support, train_graphs_list = get_graphs_for_each_subgraph(extensions=extensions, graphs=train_graphs)
    train_subgraphs_list = get_subgraphs_for_each_graph(graphs=train_graphs, graphs_list=train_graphs_list)
    
    for i in range(len(train_graphs)):
        np.save("id/train_np/g_" + str(i), np.array(train_subgraphs_list[i]))
        with open("id/train/g_" + str(i) + ".txt","w") as f:
        	f.writelines(["%s " % item for item in train_subgraphs_list[i]])
        #print(train_subgraphs_list[i])
    
    #print("Compute subgraphs list in test graphs: ")
    _, test_graphs_list = get_graphs_for_each_subgraph(extensions=extensions, graphs=test_graphs)
    test_subgraphs_list = get_subgraphs_for_each_graph(graphs=test_graphs, graphs_list=test_graphs_list)
    
    for i in range(len(test_graphs)):
        np.save("id/test_np/g_" + str(i), np.array(test_subgraphs_list[i]))
        with open("id/test/g_" + str(i) + ".txt","w") as fo:
        	fo.writelines(["%s " % item for item in test_subgraphs_list[i]])
        #print(test_subgraphs_list[i])

    np.save("extensions", extensions)
    np.save("support", support)
示例#6
0
def main(filename='data/Compound_422.txt', min_sup=300):
    filename = os.path.join(filepath, filename)
    graphs1 = load_graphs(filename)
    extensions = []
    print("original frequent pattern:")
    gSpan([], graphs1, min_sup=min_sup, extensions=extensions)
    for i, ext in enumerate(extensions):
        print('Pattern %d' % (i + 1))
        for _c in ext:
            print(_c)
        print('')
        print("--- %s seconds ---" % (time.time() - start_time))
    extensions1 = extensions
    print("original frequent pattern list:")
    print(extensions1)
    n = len(graphs1)
    print("total number of graph:")
    print(n)
    x = int(input('what percentage want to take:'))
    p = round((x / 100) * n)
    print("graph size now is :")
    print(p)
    graphs = random.sample(graphs1, p)
    #print(graphs)
    #b=len(graphs)
    #print(b)
    extensions = []
    print("percentwise frequent pattern:")
    gSpan([], graphs, min_sup=175, extensions=extensions)
    for i, ext in enumerate(extensions):
        print('Pattern %d' % (i + 1))
        for _c in ext:
            print(_c)
        print('')
    print("percentwise frequent pattern list:")

    print(extensions)
    l2 = len(extensions)
    '''results = confusion_matrix(extensions1, extensions) 
    print ('Confusion Matrix :')
    print(results) 
    print ('Accuracy Score :',accuracy_score(extensions1, extensions) )
    print ('Report : ')
    print (classification_report(extensions1, extensions))'''
    tp1 = []
    fp1 = []

    for list in extensions:
        if list in extensions1:
            tp1.append(list)
    #print("differences")
    #print(differences)
    tp = len(tp1)
    #print("tp :",tp)
    l1 = len(extensions1)
    #print("length of frqnt pattern:")
    #print(l1)
    for list in extensions:
        if list not in extensions1:
            fp1.append(list)
    fp = len(fp1)
    # print("fp :" ,fp)
    fn = l1 - tp
    #print("fn :", fn)
    accuracy = (tp / l1) * 100
    print("accuracy:", accuracy)
    prcn = tp / (fp + tp)
    print("precission", prcn)
    recall = tp / (tp + fn)
    print("recall", recall)
    jc = (tp / (l1 + l2)) * 100
    print("jaccard coefficient", jc)
    f1score = 2 * ((prcn * recall) / (prcn + recall))
    print("F1Score:", f1score)
def main(min_sup):
    allGraph2Dic = generateInput()

    #filename = os.path.join(filepath, filename)
    gspanInput = []
    for graph2Dic in allGraph2Dic:
        gspanInput.append(list(numberingIndex(graph2Dic)))

    graphs = load_graphs(gspanInput)
    n = len(graphs)
    extensions = []
    extensions_sups = list()
    graphIndex = list()
    gSpan([], graphs, min_sup, extensions, extensions_sups)
    _report_df = pd.DataFrame()
    '''
    _report_df = _report_df.append(
            pd.DataFrame(
                {
                    'support': [_support],
                    'vertex': [nodeInfo],
                    'link' : [linkInfo],
                    'num_vert': [_num_vertices]
                },
                index=[i]
            )
        )
    '''

    for i, ext in enumerate(extensions):
        #print('Pattern %d' % (i+1))
        #print('Support : %d' %extensions_sups[i])
        nodelist = list()
        edgelist = list()
        graphIndex.append(i)
        for _c in ext:
            #print(_c)
            nodes = []
            nodes.append(_c[0])
            nodes.append(_c[2])
            nodelist.append(nodes)
            nodes = []
            nodes.append(_c[1])
            nodes.append(_c[3])
            nodelist.append(nodes)
            edges = []
            edges.append(_c[2])
            edges.append(_c[3])
            edges.append(_c[4])
            edgelist.append(edges)

        nodelist = list(set(map(tuple, nodelist)))

        nodecnt = len(nodelist)
        _report_df = _report_df.append(
            pd.DataFrame(
                {
                    'support': [extensions_sups[i]],
                    'vertex': [nodelist],
                    'link': [edgelist],
                    'num_vert': [nodecnt]
                },
                index=[i]))

        #print('')
    #print("_report_df : print")
    #print(_report_df)
    return _report_df