def main(): #arrange and display dataSet ds = dataSet() ds.readInput('input.csv') ds.readOutput('learnOutput.csv') ds.displayInput(49,True) ds.runPca(); ds.displayPca([]) # run neural network nn = neuralNetwork(25) nn.learnNetwork(ds) nn.nnTest(ds) plt.show()
def getTrainData(embedding): print("prepare Data") # load data graph_path = os.path.join('temp/graph.txt') # 边的文件 text_path = os.path.join("..", "datasets", 'cora', 'data.txt') # 找到节点的描述文件 data = dataSet(text_path, graph_path) # 打开对应文件夹下面的描述文件 其中graph中存储的为边的链接描述信息 data文件中为节点的描述信息 zhihu 为中文描述 edgesEmbed = [] for i, j in edge_Train: if i in embedding.keys() and j in embedding.keys(): lsnodes = data.negNnodes(i, j, 10) dot2 = np.dot(embedding[i], embedding[j]) tempEmbed = [] for m in range(len(lsnodes)): if lsnodes[m] in embedding.keys(): dot1 = np.dot(embedding[i], embedding[lsnodes[m]]) tempVal = dot2 - dot1 tempEmbed.append(tempVal) else: tempVal =dot2 tempEmbed.append(tempVal) edgesEmbed.append(tempEmbed) edgesEmbedarray = np.array(edgesEmbed) X_train = edgesEmbedarray edgesEmbed = [] for i, j in edge_Test: if i in embedding.keys() and j in embedding.keys(): lsnodes = data.negNnodes(i, j, 10) dot2 = np.dot(embedding[i], embedding[j]) tempEmbed = [] for m in range(len(lsnodes)): if lsnodes[m] in embedding.keys(): dot1 = np.dot(embedding[i], embedding[lsnodes[m]]) tempVal = dot2 - dot1 tempEmbed.append(tempVal) else: tempVal =dot2 tempEmbed.append(tempVal) edgesEmbed.append(tempEmbed) edgesEmbedarray = np.array(edgesEmbed) X_test = edgesEmbedarray edgeAnomaly = [] fanomaly = open('temp/anomalyedgecora.txt', 'rb') edgesAnomaly = [list(map(int, i.strip().decode().split(' '))) for i in fanomaly] print(len(edgesAnomaly)) edgestr=[] for i, j in edgesAnomaly: if i in embedding.keys() and j in embedding.keys(): # distance = euclideann2(node2vec[i], node2vec[j]) # disTrain.append(list(node2vec[i])+list(node2vec[j])) lsnodes = data.negNnodes(i, j,10) dot2 = np.dot(embedding[i], embedding[j]) tempEmbed = [] for m in range(len(lsnodes)): if lsnodes[m] in embedding.keys(): dot1 = np.dot(embedding[i], embedding[lsnodes[m]]) tempVal = dot2 - dot1 tempEmbed.append(tempVal) else: tempVal =dot2 tempEmbed.append(tempVal) edgeAnomaly.append(tempEmbed) edgestr.append("{},{}".format(i,j)) # edge = np.append(node2vec[i], node2vec[j]) # 计算为正样本的概率 # edgeAnomaly.append(edge) print('edgestr',edgestr) edgeAnomalyarray = np.array(edgeAnomaly) print(edgesEmbedarray.shape) print(X_train.shape) print(X_test.shape) print(edgeAnomalyarray.shape) print("prepare Data ended") return X_train, X_test, edgeAnomalyarray
import numpy as np import tensorflow as tf from DataSet import dataSet import config import cane import random #load data graph_path = 'graph.txt' text_path = 'data.txt' data = dataSet(text_path, graph_path) # start session with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): model = cane.Model(data.num_vocab, data.num_nodes) opt = tf.train.AdamOptimizer(config.lr) train_op = opt.minimize(model.loss) sess.run(tf.global_variables_initializer()) #training print 'start training.......' for epoch in range(config.num_epoch): loss_epoch = 0 batches = data.generate_batches() h1 = 0 num_batch = len(batches)
import config import came import os import logging import psutil import random FORMAT = "%(asctime)s - %(message)s" logging.basicConfig(level=logging.INFO) # load data all_graph_path = 'all_graph.txt' song_graph_path = 'song_graph.txt' text_path = 'data_all.txt' logging.info('start reading data.......') data = dataSet(text_path, all_graph_path, song_graph_path) logging.info('end reading data.......') # assign GPU os.environ['CUDA_VISIBLE_DEVICES'] = config.CUDA_VISIBLE_DEVICES # GPU usage amount gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Graph().as_default(): sess = tf.Session(config=gpu_config) with sess.as_default(): model = came.Model(data.num_vocab, data.num_all_nodes) opt = tf.train.AdamOptimizer(config.lr) train_op = opt.minimize(model.loss) sess.run(tf.global_variables_initializer())
def getTrainData(): print("prepare Data") # load data graph_path = os.path.join('temp/graph.txt') # 边的文件 text_path = os.path.join("..", "datasets", 'zhihu', 'data.txt') # 找到节点的描述文件 data = dataSet(text_path, graph_path) # 打开对应文件夹下面的描述文件 其中graph中存储的为边的链接描述信息 data文件中为节点的描述信息 zhihu 为中文描述 # f = open('../datasets/zhihu/graph.txt' , 'rb') f = open('temp/graph.txt', 'rb') edges = [list(map(int, i.strip().decode().split('\t'))) for i in f] print(len(edges)) nodesTrain = list(set([i for j in edges for i in j])) # 表示信息 node2vec = {} # 使用结构的嵌入信息 fline = open('temp/vec_all.txt', 'rb') dvec = {} for i, j in enumerate(fline): if j.decode() != '\n': tempvec = list(map(float, j.strip().decode().split(' '))) dvec[tempvec[0]] = list(tempvec[1:]) # print(dvec) f = open('temp/embed.txt', 'rb') for i, j in enumerate(f): if j.decode() != '\n': a = list(map(float, j.strip().decode().split(' '))) # node2vec[i] = list(dvec[i]) +[i *0.3 for i in a] node2vec[i] = list(dvec[i]) + a edgesEmbed = [] for i, j in edges: if i in node2vec.keys() and j in node2vec.keys(): # distance = euclideann2(node2vec[i], node2vec[j]) # disTrain.append(list(node2vec[i])+list(node2vec[j])) #edge = np.append(node2vec[i], node2vec[j]) # 计算为正样本的概率 lsnodes = data.negNnodes(i, j, 10) dot2 = np.dot(node2vec[i], node2vec[j]) tempEmbed = [] for m in range(len(lsnodes)): if lsnodes[m] in node2vec.keys(): dot1 = np.dot(node2vec[i], node2vec[lsnodes[m]]) tempVal = dot2 - dot1 tempEmbed.append(tempVal) else: tempVal = dot2 tempEmbed.append(tempVal) edgesEmbed.append(tempEmbed) edgesEmbedarray = np.array(edgesEmbed) X_train, X_test = train_test_split(edgesEmbedarray, test_size=0.2, random_state=42) edgeAnomaly = [] fanomaly = open( 'C:/Users/Administrator/Desktop/CANE-master/code/temp/anomalyedge3.txt', 'rb') edgesAnomaly = [ list(map(int, i.strip().decode().split(' '))) for i in fanomaly ] print(len(edgesAnomaly)) for i, j in edgesAnomaly: if i in node2vec.keys() and j in node2vec.keys(): # distance = euclideann2(node2vec[i], node2vec[j]) # disTrain.append(list(node2vec[i])+list(node2vec[j])) lsnodes = data.negNnodes(i, j, 10) dot2 = np.dot(node2vec[i], node2vec[j]) tempEmbed = [] for m in range(len(lsnodes)): if lsnodes[m] in node2vec.keys(): dot1 = np.dot(node2vec[i], node2vec[lsnodes[m]]) tempVal = dot2 - dot1 tempEmbed.append(tempVal) else: tempVal = dot2 tempEmbed.append(tempVal) edgeAnomaly.append(tempEmbed) #edge = np.append(node2vec[i], node2vec[j]) # 计算为正样本的概率 #edgeAnomaly.append(edge) edgeAnomalyarray = np.array(edgeAnomaly) print(edgesEmbedarray.shape) print(X_train.shape) print(X_test.shape) print(edgeAnomalyarray.shape) print("prepare Data ended") return X_train, X_test, edgeAnomalyarray
import numpy as np import tensorflow as tf from DataSet import dataSet import config import sacqa import random #load data train_graph_path = 'datasets/train_graph.txt' text_path = 'datasets/data.txt' val_graph_path = 'datasets/test_graph.txt' train_y = 'datasets/train_y.txt' val_y = 'datasets/test_y.txt' val_q = 'datasets/test_q.txt' data = dataSet(text_path, train_graph_path, val_graph_path, train_y, val_y, val_q) # start session # with tf.Graph().as_default() as g: sess = tf.Session() with sess.as_default(): model = sacqa.Model(data.num_vocab) # for var in model.collection: # print(var.get_shape()) print(tf.trainable_variables()) # assert False opt = tf.train.AdamOptimizer(config.lr) # opt = tf.train.GradientDescentOptimizer(config.lr) train_op = opt.minimize(model.loss)