def run(args): #load configuration config = Config(args) #load data path = args.input if args.categorical: training_data,training_label,testing_data,testing_label = ld.load_categorical(path) else: training_data,training_label,testing_data,testing_label = ld.load_numberic(path) instance_dim = len(training_data[0]) with tf.Graph().as_default(),tf.Session() as sess: model = RCAE(instance_dim, config.hidden_dim) init = tf.global_variables_initializer() sess.run(init) for epoch in range(config.epoch): #training batch_num = len(training_data)//config.batch_size for i in range(batch_num): curr_batch = training_data[i*config.batch_size:(i+1)*config.batch_size] feed_dict = {model.data: curr_batch} result = sess.run((model.D_train,model.G_train,model.average_loss),feed_dict=feed_dict) loss_threshold = result[2] if i % 100 == 0: # if threshold < loss_threshold: # loss_threshold = threshold print("In epoch %d and batch %d, average loss: %.4f "%(epoch,i,loss_threshold))
def run(args): #load configuration config = Config(args) #load data path = args.input if args.categorical: training_data,training_label,testing_data,testing_label = ld.load_categorical(path) else: training_data,training_label,testing_data,testing_label = ld.load_numeric(path) instance_dim = len(training_data[0]) #shuffle testing data,to ensure testing data and label are shuffled in the same way randnum = random.randint(0,100) random.seed(randnum) random.shuffle(testing_data) random.seed(randnum) random.shuffle(testing_label) with tf.Graph().as_default(),tf.Session() as sess: model = ALOCC(instance_dim, config.hidden_dim, config.alpha, config.beta, config.gamma, config.learning_rate) init = tf.global_variables_initializer() sess.run(init) for epoch in range(config.epoch): #training batch_num = len(training_data)//config.batch_size for i in range(batch_num): curr_batch = training_data[i*config.batch_size:(i+1)*config.batch_size] feed_dict = {model.data: curr_batch} result = sess.run((model.D_train,model.G_train,model.average_loss),feed_dict=feed_dict) loss_threshold = result[2] if i % 100 == 0: # if threshold < loss_threshold: # loss_threshold = threshold print("In epoch %d and batch %d, average loss: %.4f "%(epoch,i,loss_threshold)) ''' ##### testing after each epoch ##### ''' #individual instance level evaluation loss_threshold = loss_threshold * config.threshold_scale feed_dict = {model.data: testing_data} testing_data_loss = sess.run(model.total_loss,feed_dict=feed_dict) # print("testing_data_loss",testing_data_loss,testing_data_loss.shape) individual_pred = [] for i in range(len(testing_data_loss)): if testing_data_loss[i] < loss_threshold: individual_pred.append(1) else: individual_pred.append(0) # print(len(individual_pred),len(testing_label)) instance_eval = eval(testing_label,individual_pred) #write instance loss to file bw = open(args.instance_output, 'w') bw.write("true pred\n") for i in range(len(testing_data_loss)): bw.write(str(testing_label[i])+ " "+str(testing_data_loss[i])+"\n") bw.close() #block level evaluation pred_block = [] true_block = [] testing_block_num = len(testing_data) // config.block_size for i in range(testing_block_num): pred_sum = np.sum(individual_pred[i*config.block_size:(i+1)*config.block_size]) true_sum = np.sum(testing_label[i*config.block_size:(i+1)*config.block_size]) if pred_sum < config.block_size*config.block_ratio: pred_block.append(0) else: pred_block.append(1) if true_sum < config.block_size*config.block_ratio: true_block.append(0) else: true_block.append(1) block_eval = eval(true_block,pred_block) #write block loss to file bw = open(args.block_output, 'w') bw.write("true pred\n") for i in range(testing_block_num): true_block = "1" pred_block = np.mean(testing_data_loss[i*config.block_size:(i+1)*config.block_size]) true_sum = np.sum(testing_label[i*config.block_size:(i+1)*config.block_size]) if true_sum < config.block_size*config.block_ratio: true_block = "0" bw.write(true_block+ " "+str(pred_block)+"\n") bw.close() # print(true_block,pred_block) print("instance level evaluation: ",instance_eval) print("block level evaluation: ",block_eval)
def IsoForest(args): #load data path = args.input if args.categorical: training_data, training_label, testing_data, testing_label = ld.load_categorical( path) else: training_data, training_label, testing_data, testing_label = ld.load_numeric( path) #shuffle testing data,to ensure testing data and label are shuffled in the same way randnum = random.randint(0, 100) random.seed(randnum) random.shuffle(testing_data) random.seed(randnum) random.shuffle(testing_label) clf = IsolationForest(max_samples=100) clf.fit(training_data) label_pred = clf.predict(testing_data) individual_pred = [] for i in range(len(label_pred)): if label_pred[i] == -1: individual_pred.append(0) else: individual_pred.append(label_pred[i]) instance_eval = eval(testing_label, individual_pred) score = clf.score_samples(testing_data) print("data", testing_label, label_pred, score) #write instance loss to file bw = open(args.instance_output, 'w') bw.write("true pred\n") for i in range(len(score)): bw.write(str(testing_label[i]) + " " + str(score[i]) + "\n") bw.close() #block level evaluation pred_block = [] true_block = [] testing_block_num = len(testing_data) // args.block_size for i in range(testing_block_num): pred_sum = np.sum(individual_pred[i * args.block_size:(i + 1) * args.block_size]) true_sum = np.sum(testing_label[i * args.block_size:(i + 1) * args.block_size]) if pred_sum < args.block_size * args.block_ratio: pred_block.append(0) else: pred_block.append(1) if true_sum < args.block_size * args.block_ratio: true_block.append(0) else: true_block.append(1) block_eval = eval(true_block, pred_block) #write block loss to file bw = open(args.block_output, 'w') bw.write("true pred\n") for i in range(testing_block_num): true_block = "1" pred_block = np.mean(score[i * args.block_size:(i + 1) * args.block_size]) true_sum = np.sum(testing_label[i * args.block_size:(i + 1) * args.block_size]) if true_sum < args.block_size * args.block_ratio: true_block = "0" bw.write(true_block + " " + str(pred_block) + "\n") bw.close() # print("pred_block",pred_block,"true_block",true_block) print("instance level evaluation: ", instance_eval) print("block level evaluation: ", block_eval)