def main():
	#arrange and display dataSet
	ds = dataSet()
	ds.readInput('input.csv')
	ds.readOutput('learnOutput.csv')
	ds.displayInput(49,True)
	ds.runPca();
	ds.displayPca([])
	# run neural network
	nn = neuralNetwork(25)
	nn.learnNetwork(ds)
	nn.nnTest(ds)
	plt.show()  
示例#2
0
def getTrainData(embedding):
    print("prepare Data")
    # load data
    graph_path = os.path.join('temp/graph.txt')  # 边的文件
    text_path = os.path.join("..", "datasets", 'cora', 'data.txt')  # 找到节点的描述文件
    data = dataSet(text_path, graph_path)
    # 打开对应文件夹下面的描述文件  其中graph中存储的为边的链接描述信息 data文件中为节点的描述信息 zhihu 为中文描述

    edgesEmbed = []
    for i, j in edge_Train:
        if i in embedding.keys() and j in embedding.keys():
            lsnodes = data.negNnodes(i, j, 10)

            dot2 = np.dot(embedding[i], embedding[j])
            tempEmbed = []
            for m in range(len(lsnodes)):
                if lsnodes[m] in embedding.keys():
                    dot1 = np.dot(embedding[i], embedding[lsnodes[m]])
                    tempVal = dot2 - dot1
                    tempEmbed.append(tempVal)
                else:
                    tempVal =dot2
                    tempEmbed.append(tempVal)
            edgesEmbed.append(tempEmbed)
    edgesEmbedarray = np.array(edgesEmbed)
    X_train = edgesEmbedarray

    edgesEmbed = []
    for i, j in edge_Test:
        if i in embedding.keys() and j in embedding.keys():
            lsnodes = data.negNnodes(i, j, 10)

            dot2 = np.dot(embedding[i], embedding[j])
            tempEmbed = []
            for m in range(len(lsnodes)):
                if lsnodes[m] in embedding.keys():
                    dot1 = np.dot(embedding[i], embedding[lsnodes[m]])
                    tempVal = dot2 - dot1
                    tempEmbed.append(tempVal)
                else:
                    tempVal =dot2
                    tempEmbed.append(tempVal)

            edgesEmbed.append(tempEmbed)
    edgesEmbedarray = np.array(edgesEmbed)
    X_test = edgesEmbedarray

    edgeAnomaly = []
    fanomaly = open('temp/anomalyedgecora.txt', 'rb')
    edgesAnomaly = [list(map(int, i.strip().decode().split(' '))) for i in fanomaly]
    print(len(edgesAnomaly))
    edgestr=[]
    for i, j in edgesAnomaly:
        if i in embedding.keys() and j in embedding.keys():
            # distance = euclideann2(node2vec[i], node2vec[j])
            # disTrain.append(list(node2vec[i])+list(node2vec[j]))
            lsnodes = data.negNnodes(i, j,10)
            dot2 = np.dot(embedding[i], embedding[j])
            tempEmbed = []
            for m in range(len(lsnodes)):
                if lsnodes[m] in embedding.keys():
                    dot1 = np.dot(embedding[i], embedding[lsnodes[m]])
                    tempVal = dot2 - dot1
                    tempEmbed.append(tempVal)
                else:
                    tempVal =dot2
                    tempEmbed.append(tempVal)
            edgeAnomaly.append(tempEmbed)
            edgestr.append("{},{}".format(i,j))

            # edge = np.append(node2vec[i], node2vec[j])  # 计算为正样本的概率
            # edgeAnomaly.append(edge)
    print('edgestr',edgestr)
    edgeAnomalyarray = np.array(edgeAnomaly)
    print(edgesEmbedarray.shape)
    print(X_train.shape)
    print(X_test.shape)
    print(edgeAnomalyarray.shape)

    print("prepare Data ended")
    return X_train, X_test, edgeAnomalyarray
示例#3
0
文件: train.py 项目: zwytop/CANE
import numpy as np
import tensorflow as tf
from DataSet import dataSet
import config
import cane
import random

#load data
graph_path = 'graph.txt'
text_path = 'data.txt'

data = dataSet(text_path, graph_path)

# start session

with tf.Graph().as_default():
    sess = tf.Session()
    with sess.as_default():
        model = cane.Model(data.num_vocab, data.num_nodes)
        opt = tf.train.AdamOptimizer(config.lr)
        train_op = opt.minimize(model.loss)
        sess.run(tf.global_variables_initializer())

        #training
        print 'start training.......'

        for epoch in range(config.num_epoch):
            loss_epoch = 0
            batches = data.generate_batches()
            h1 = 0
            num_batch = len(batches)
示例#4
0
文件: train.py 项目: MirkoPenn/came
import config
import came
import os
import logging
import psutil
import random
FORMAT = "%(asctime)s - %(message)s"
logging.basicConfig(level=logging.INFO)

# load data
all_graph_path = 'all_graph.txt'
song_graph_path = 'song_graph.txt'
text_path = 'data_all.txt'

logging.info('start reading data.......')
data = dataSet(text_path, all_graph_path, song_graph_path)
logging.info('end reading data.......')

# assign GPU
os.environ['CUDA_VISIBLE_DEVICES'] = config.CUDA_VISIBLE_DEVICES

# GPU usage amount
gpu_config = tf.ConfigProto()
gpu_config.gpu_options.allow_growth = True
with tf.Graph().as_default():
    sess = tf.Session(config=gpu_config)
    with sess.as_default():
        model = came.Model(data.num_vocab, data.num_all_nodes)
        opt = tf.train.AdamOptimizer(config.lr)
        train_op = opt.minimize(model.loss)
        sess.run(tf.global_variables_initializer())
示例#5
0
def getTrainData():
    print("prepare Data")

    # load data

    graph_path = os.path.join('temp/graph.txt')  # 边的文件
    text_path = os.path.join("..", "datasets", 'zhihu',
                             'data.txt')  # 找到节点的描述文件

    data = dataSet(text_path, graph_path)
    # 打开对应文件夹下面的描述文件  其中graph中存储的为边的链接描述信息 data文件中为节点的描述信息 zhihu 为中文描述
    # f = open('../datasets/zhihu/graph.txt' , 'rb')
    f = open('temp/graph.txt', 'rb')
    edges = [list(map(int, i.strip().decode().split('\t'))) for i in f]
    print(len(edges))
    nodesTrain = list(set([i for j in edges for i in j]))
    # 表示信息
    node2vec = {}
    # 使用结构的嵌入信息
    fline = open('temp/vec_all.txt', 'rb')
    dvec = {}
    for i, j in enumerate(fline):
        if j.decode() != '\n':
            tempvec = list(map(float, j.strip().decode().split(' ')))
            dvec[tempvec[0]] = list(tempvec[1:])
    # print(dvec)
    f = open('temp/embed.txt', 'rb')
    for i, j in enumerate(f):
        if j.decode() != '\n':
            a = list(map(float, j.strip().decode().split(' ')))
            # node2vec[i] = list(dvec[i]) +[i *0.3 for  i in a]
            node2vec[i] = list(dvec[i]) + a
    edgesEmbed = []
    for i, j in edges:
        if i in node2vec.keys() and j in node2vec.keys():
            # distance = euclideann2(node2vec[i], node2vec[j])
            # disTrain.append(list(node2vec[i])+list(node2vec[j]))
            #edge = np.append(node2vec[i], node2vec[j])  # 计算为正样本的概率
            lsnodes = data.negNnodes(i, j, 10)

            dot2 = np.dot(node2vec[i], node2vec[j])
            tempEmbed = []
            for m in range(len(lsnodes)):
                if lsnodes[m] in node2vec.keys():
                    dot1 = np.dot(node2vec[i], node2vec[lsnodes[m]])
                    tempVal = dot2 - dot1
                    tempEmbed.append(tempVal)
                else:
                    tempVal = dot2
                    tempEmbed.append(tempVal)
            edgesEmbed.append(tempEmbed)

    edgesEmbedarray = np.array(edgesEmbed)
    X_train, X_test = train_test_split(edgesEmbedarray,
                                       test_size=0.2,
                                       random_state=42)

    edgeAnomaly = []
    fanomaly = open(
        'C:/Users/Administrator/Desktop/CANE-master/code/temp/anomalyedge3.txt',
        'rb')
    edgesAnomaly = [
        list(map(int,
                 i.strip().decode().split(' '))) for i in fanomaly
    ]
    print(len(edgesAnomaly))
    for i, j in edgesAnomaly:
        if i in node2vec.keys() and j in node2vec.keys():
            # distance = euclideann2(node2vec[i], node2vec[j])
            # disTrain.append(list(node2vec[i])+list(node2vec[j]))
            lsnodes = data.negNnodes(i, j, 10)
            dot2 = np.dot(node2vec[i], node2vec[j])
            tempEmbed = []
            for m in range(len(lsnodes)):
                if lsnodes[m] in node2vec.keys():
                    dot1 = np.dot(node2vec[i], node2vec[lsnodes[m]])
                    tempVal = dot2 - dot1
                    tempEmbed.append(tempVal)

                else:
                    tempVal = dot2
                    tempEmbed.append(tempVal)

            edgeAnomaly.append(tempEmbed)
            #edge = np.append(node2vec[i], node2vec[j])  # 计算为正样本的概率
            #edgeAnomaly.append(edge)

    edgeAnomalyarray = np.array(edgeAnomaly)
    print(edgesEmbedarray.shape)
    print(X_train.shape)
    print(X_test.shape)
    print(edgeAnomalyarray.shape)

    print("prepare Data ended")
    return X_train, X_test, edgeAnomalyarray
示例#6
0
import numpy as np
import tensorflow as tf
from DataSet import dataSet
import config
import sacqa
import random

#load data
train_graph_path = 'datasets/train_graph.txt'
text_path = 'datasets/data.txt'
val_graph_path = 'datasets/test_graph.txt'
train_y = 'datasets/train_y.txt'
val_y = 'datasets/test_y.txt'
val_q = 'datasets/test_q.txt'

data = dataSet(text_path, train_graph_path, val_graph_path, train_y, val_y,
               val_q)

# start session

# with tf.Graph().as_default() as  g:
sess = tf.Session()

with sess.as_default():
    model = sacqa.Model(data.num_vocab)
    # for var in model.collection:
    #     print(var.get_shape())
    print(tf.trainable_variables())
    # assert False
    opt = tf.train.AdamOptimizer(config.lr)
    # opt = tf.train.GradientDescentOptimizer(config.lr)
    train_op = opt.minimize(model.loss)