def BPNet(file_name): rengong_filename = r'C:\Users\Administrator\Desktop\yanshan_rengong1.tif' P = [] T = [] butoushui_P = LoadData(1, file_name, rengong_filename) butoushui_P = RD.sample(butoushui_P, 2000) butoushui_P = sio.loadmat('../JadeLibSVM/' + 'butoushui_P.mat')['butoushui_P'] M = len(butoushui_P) P = butoushui_P P = butoushui_P.tolist() T = [1] * M print M toushui_P = LoadData(0, file_name, rengong_filename) toushui_P = RD.sample(toushui_P, 2000) toushui_P = sio.loadmat('../JadeLibSVM/' + 'toushui_P.mat')['toushui_P'] M = len(toushui_P) P.extend(toushui_P) toushui_P = [0] * M T.extend(toushui_P) print M nn = NeuralNetwork([3, 2, 1], 'tanh') nn.fit(P, T, 0.01, 5000) print('**************训练结束****************') p_test = extract_Yanshan('') predict_label = [] for i in p_test: predict_label.append(nn.predict(i)[0]) pic = array(Image.open(file_name)) X = pic.shape[0] Y = pic.shape[1] P = pic.shape[2] Test_data = np.zeros((X * Y, 3), dtype='double') k = 0 for i in range(X): for j in range(Y): Test_data[k, 0] = pic[i, j, 0] Test_data[k, 1] = pic[i, j, 1] Test_data[k, 2] = pic[i, j, 2] k = k + 1 result = np.zeros((X, Y, 3)) #RGB彩图 for k in range(X * Y): # R分量 G分量 B分量 if (predict_label[k] >= 0.5): Test_data[k, 0] = 1 Test_data[k, 1] = 1 Test_data[k, 2] = 1 #白色 elif (predict_label[k] < 0.5): Test_data[k, 0] = 0 Test_data[k, 1] = 0 Test_data[k, 2] = 0 #%黑色 k = 0 for i in range(X): for j in range(Y): result[i, j, 0] = Test_data[k, 0] result[i, j, 1] = Test_data[k, 1] result[i, j, 2] = Test_data[k, 2] k = k + 1 return result
def main(): global domain global domain_distance global distance_file if not len(sys.argv) == 4: print( "python testBetaIC.py <k> <beta> <domain number> \n Domain num: \n 0 : accident, 1: sanitation, 2: crime, 3: adult" ) return k = int(sys.argv[1]) #50 beta = float(sys.argv[2]) domain_num = int(sys.argv[3]) domain = domain_arr[domain_num] domain_distance = distance_arr[domain_num] print(domain + " " + domain_distance) Ld = LoadData(domain) G = Ld.readFile() distance_file = "" if (os.path.isfile(domain + "_distance.txt")): distance_file = domain + "_distance.txt" print("Dataset:", domain, "K = ", k, "Distance:", domain_distance, "beta=", beta) aff_array = test_Kcenter(G, k, domain, domain_distance) print("\n") print( "#######################################################################\n" ) bs = betaStrong(domain, G, aff_array, k, beta, domain_distance, distance_file) aff_array = bs.beta_IC() calculate_composition(G, k, aff_array, domain) del Ld
def train(FLAGS): # Data loading import pickle as pk data = DATA.LoadData(FLAGS.path, FLAGS.dataset) if FLAGS.verbose > 0: print( "FM: dataset=%s, embedding_size=%d,#epoch=%d, batch=%d, lr=%.4f, lambda=%.1e, keep=%s, metric=%s, optimizer=%s, batch_norm=%d" % (FLAGS.dataset, FLAGS.embedding_size, FLAGS.epoch, FLAGS.batch_size, FLAGS.lr, FLAGS.lamda, FLAGS.keep, FLAGS.metric, FLAGS.optimizer, FLAGS.batch_norm)) # Training t1 = time() model = FM(data.features_M, FLAGS.pretrain, make_save_file(FLAGS), FLAGS.embedding_size, FLAGS.valid_dimen, FLAGS.epoch, FLAGS.metric, FLAGS.batch_size, FLAGS.lr, FLAGS.lamda, FLAGS.keep, FLAGS.optimizer, FLAGS.batch_norm, FLAGS.verbose) model.train(data.Train_data, data.Validation_data, data.Test_data) # Find the best validation result across iterations best_valid_score = 0 best_valid_score = min(model.valid_rmse) best_epoch = model.valid_rmse.index(best_valid_score) print("Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s]" % (best_epoch + 1, model.train_rmse[best_epoch], model.valid_rmse[best_epoch], time() - t1))
def train(args): # Data loading data = DATA.LoadData(args.path, args.dataset) if args.verbose > 0: print("AFM: dataset=%s, factors=%s, attention=%d, freeze_fm=%d, #epoch=%d, batch=%d, lr=%.4f, lambda_attention=%.1e, keep=%s, optimizer=%s, batch_norm=%d, decay=%f, activation=%s" %(args.dataset, args.hidden_factor, args.attention, args.freeze_fm, args.epoch, args.batch_size, args.lr, args.lamda_attention, args.keep, args.optimizer, args.batch_norm, args.decay, args.activation)) activation_function = tf.nn.relu if args.activation == 'sigmoid': activation_function = tf.sigmoid elif args.activation == 'tanh': activation_function == tf.tanh elif args.activation == 'identity': activation_function = tf.identity save_file = make_save_file(args) # Training t1 = time() num_variable = data.truncate_features() if args.mla: args.freeze_fm = 1 model = AFM(data.features_M, args.pretrain, save_file, args.attention, eval(args.hidden_factor), args.valid_dimen, activation_function, num_variable, args.freeze_fm, args.epoch, args.batch_size, args.lr, args.lamda_attention, eval(args.keep), args.optimizer, args.batch_norm, args.decay, args.verbose, args.mla) model.train(data.Train_data, data.Validation_data, data.Test_data) # Find the best validation result across iterations best_valid_score = 0 best_valid_score = min(model.valid_rmse) best_epoch = model.valid_rmse.index(best_valid_score) print ("Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s]" %(best_epoch+1, model.train_rmse[best_epoch], model.valid_rmse[best_epoch], time()-t1))
def train(args): # Data loading data = DATA.LoadData(args.path, args.dataset) if args.verbose > 0: print( "FM: dataset=%s, factors=%d, #epoch=%d, batch=%d, lr=%.4f, lambda=%.1e, keep=%.2f, optimizer=%s, batch_norm=%d" % (args.dataset, args.hidden_factor, args.epoch, args.batch_size, args.lr, args.lamda, args.keep, args.optimizer, args.batch_norm)) # Training t1 = time() model = FM(data.features_M, args.pretrain, make_save_file(args), args.hidden_factor, args.epoch, args.batch_size, args.lr, args.lamda, args.keep, args.optimizer, args.batch_norm, args.verbose, args.mla) model.train(data.Train_data, data.Validation_data, data.Test_data) # Find the best validation result across iterations best_valid_score = 0 best_valid_score = min(model.valid_rmse) best_epoch = model.valid_rmse.index(best_valid_score) print( "Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s], test = %.4f [%.1f s]" % (best_epoch + 1, model.train_rmse[best_epoch], model.valid_rmse[best_epoch], model.test_rmse[best_epoch], time() - t1))
def getLogisticRegression (self) : """ Fit the model to the trainig data.""" ld=LoadData(self.argumentsDict) X,y=ld.loadTrainingDataSet() tm=TrainModels(X,y) model=tm.getModelLogistic() return model,ld
def getDummy (self) : """ Fit the model to the trainig data.""" ld=LoadData(self.argumentsDict) X,y=ld.loadTrainingDataSet() tm=TrainModels(X,y) model=tm.getDummy() return model,ld
def test(self): load_test = LoadData() self.folder_test = load_test.data_test self.train_generator, self.x_train, self.x_valid, self.y_train, self.y_valid = load_test.loadDataTrain( ) self.test_generator, self.x_test = load_test.loadDataTest( self.folder_test) model = load_model('./model.h5') self.test_generator.reset() pred = model.predict_generator(self.test_generator, verbose=1, steps=600 / 1) predicted_class_indices = np.argmax(pred, axis=1) labels = (self.train_generator.class_indices) labels = dict((v, k) for k, v in labels.items()) prediksi = [labels[k] for k in predicted_class_indices] path = self.test_generator.filenames filenames = [] for x in range(len(path)): filenames.append(path[x][12:len(path[x]) - 8]) true_pred = 0 compare = [] for x in range(len(filenames)): if filenames[x] == prediksi[x]: # true_pred = true_pred + 1 compare.append("False") else: true_pred = true_pred + 1 compare.append("True") row = len(self.test_generator) list_prediksi = [] for i in range(row): list_prediksi.append([filenames[i], prediksi[i], compare[i]]) # print result to console # s = "" # for i in range(row): # print(i, list_prediksi[i]) s = ''.join(prediksi[0:row]) # print(s) self.progressBar.setValue(100) self.txtLR.setText(s) persentase = (true_pred / len(filenames)) * 100 # print(persentase) self.lblHasil.setText("Tingkat Akurasi : %.2f%%" % (persentase))
def processTestSVM(self): print('Loading data ...') # 导入数据 self.dL = Ld.LoadData() #self.trainData = self.dL.loadCsvData('tmp/trainDataSetByT.csv') self.testData = self.dL.loadCsvData('tmp/testDataSetByT.csv') print('testing ...') self.testBySvm(self.testData)
def processTrainBayes(self): print('Loading data ...') # 导入数据 self.dL = Ld.LoadData() self.trainData = self.dL.loadCsvData('tmp/trainDataSetByT.csv') #self.testData = self.dL.loadCsvData('tmp/testDataSetByT.csv') print('training ...') self.trainByBayes(self.trainData)
def evaluate(args): # load test data data = DATA.LoadData(args.path).Test_data save_file = make_save_file(args) # load the graph weight_saver = tf.train.import_meta_graph(save_file + '.meta') pretrain_graph = tf.get_default_graph() # load tensors feature_embeddings = pretrain_graph.get_tensor_by_name( 'feature_embeddings:0') nonzero_embeddings = pretrain_graph.get_tensor_by_name( 'nonzero_embeddings:0') feature_bias = pretrain_graph.get_tensor_by_name('feature_bias:0') bias = pretrain_graph.get_tensor_by_name('bias:0') fm = pretrain_graph.get_tensor_by_name('fm:0') fm_out = pretrain_graph.get_tensor_by_name('fm_out:0') out = pretrain_graph.get_tensor_by_name('out:0') train_features = pretrain_graph.get_tensor_by_name('train_features_fm:0') train_labels = pretrain_graph.get_tensor_by_name('train_labels_fm:0') dropout_keep = pretrain_graph.get_tensor_by_name('dropout_keep_fm:0') train_phase = pretrain_graph.get_tensor_by_name('train_phase_fm:0') # restore session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) weight_saver.restore(sess, save_file) # start evaluation num_example = len(data['Y']) feed_dict = { train_features: data['X'], train_labels: [[y] for y in data['Y']], dropout_keep: 1.0, train_phase: False } ne, fe = sess.run((nonzero_embeddings, feature_embeddings), feed_dict=feed_dict) _fm, _fm_out, predictions = sess.run((fm, fm_out, out), feed_dict=feed_dict) # calculate rmse y_pred = np.reshape(predictions, (num_example, )) y_true = np.reshape(data['Y'], (num_example, )) predictions_bounded = np.maximum(y_pred, np.ones(num_example) * min(y_true)) # bound the lower values predictions_bounded = np.minimum(predictions_bounded, np.ones(num_example) * max(y_true)) # bound the higher values RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded)) print("Test RMSE: %.4f" % (RMSE)) logging.info("Test RMSE: %.4f" % (RMSE))
def train(args): # Data loading data = DATA.LoadData(args.path, args.dataset) if args.verbose > 0: print("DeepAFM: dataset=%s, factors=%s, valid_dim=%d, #epoch=%d, batch=%d, lr=%.4f, \ lambda_attention=%.1e, keep=%s, optimizer=%s, batch_norm=%d, decay=%f, \ activation=%s, field_size=%d, dropout_deep=%s, deep_layers=%s" %(args.dataset, args.hidden_factor, args.valid_dimen,args.epoch, args.batch_size, \ args.lr, args.lamda_attention, args.keep, args.optimizer,\ args.batch_norm, args.decay, args.activation, args.field_size,\ args.dropout_deep, args.deep_layers)) activation_function = tf.nn.relu if args.activation == 'sigmoid': activation_function = tf.sigmoid elif args.activation == 'tanh': activation_function == tf.tanh elif args.activation == 'identity': activation_function = tf.identity save_file = make_save_file(args) # Training t1 = time() model = DeepAFM(data.features_M, args.pretrain, save_file, eval(args.hidden_factor), args.valid_dimen, activation_function, args.epoch, args.batch_size, args.lr, args.lamda_attention, eval(args.keep), args.optimizer, args.batch_norm, args.decay, args.verbose, args.field_size, random_seed=2016, i_gpu=args.gpu, dropout_deep=eval(args.dropout_deep), deep_layers=eval(args.deep_layers)) model.train(data.Train_data, data.Validation_data, data.Test_data) # Find the best validation result across iterations best_valid_score = 0 best_valid_score = max(model.valid_rmse) best_epoch = model.valid_rmse.index(best_valid_score) print("Best Iter(validation)= %d\t train = %.4f, valid = %.4f [%.1f s]" % (best_epoch + 1, model.train_rmse[best_epoch], model.valid_rmse[best_epoch], time() - t1))
def getDataSet(pDict, resdir): """Load the data set""" ld = LoadData(pDict) X_train, y_train = ld.loadTrainingDataSet() X_test, dbkeys = ld.loadTestDataSet() dictML = { "X_test": X_test, "X_train": X_train, "y_train": y_train, "resultsDirectory": resdir } return dictML, dbkeys
def train(args): # Data loading load = DATA.LoadData("data") data = load.getdata() cdata = load.cdata() cnn_label = load.getcnn() if args.verbose > 0: print( "FM: #epoch=%d, batch=%d, lr=%.4f, optimizer=%s, batch_norm=%d" % (args.epoch, args.batch_size, args.lr, args.optimizer, args.batch_norm)) sjnum = len(data[20]) sdnum = len(data[21]) model = SLR(sjnum, sdnum, args) model.train(cnn_label, data, cdata)
def evaluate(args): # load test data data = DATA.LoadData(args.path, args.dataset).Test_data save_file = make_save_file(args) # load the graph weight_saver = tf.train.import_meta_graph(save_file + '.meta') pretrain_graph = tf.get_default_graph() # load tensors # feature_embeddings = pretrain_graph.get_tensor_by_name('feature_embeddings:0') # feature_bias = pretrain_graph.get_tensor_by_name('feature_bias:0') # bias = pretrain_graph.get_tensor_by_name('bias:0') # afm = pretrain_graph.get_tensor_by_name('afm:0') out_of_afm = pretrain_graph.get_tensor_by_name('DeepAFM_out:0') # placeholders for afm train_features_afm = pretrain_graph.get_tensor_by_name( 'train_features_afm:0') train_labels_afm = pretrain_graph.get_tensor_by_name('train_labels_afm:0') dropout_keep_afm = pretrain_graph.get_tensor_by_name('dropout_keep_afm:0') train_phase_afm = pretrain_graph.get_tensor_by_name('train_phase_afm:0') dropout_keep_deep = pretrain_graph.get_tensor_by_name( 'dropout_keep_deep:0') # restore session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) weight_saver.restore(sess, save_file) # start evaluation num_example = len(data['Y']) feed_dict = { train_features_afm: data['X'], train_labels_afm: [[y] for y in data['Y']], dropout_keep_afm: [1.0, 1.0], dropout_keep_deep: [1.0] * 3, train_phase_afm: False } predictions = sess.run((out_of_afm), feed_dict=feed_dict) # calculate rmse y_pred_afm = np.reshape(predictions, (num_example, )) y_true = np.reshape(data['Y'], (num_example, )) auc_score = roc_auc_score(y_true, y_pred_afm) print("Test AUC: {:.6f}".format(auc_score))
def train(args): # Data loading load = DATA.LoadData(args.dataset) data = load.getdata() data_for_validation = load.data_for_validation() sentence_label = load.getsentence() if args.verbose > 0: print( "args: #batch_size=%d, epoch=%d, batch=%d, lr=%.4f, optimizer=%s, batch_norm=%d, L2_regularization=%f" % (args.batch_size, args.epoch, args.batch_size, args.lr, args.optimizer, args.batch_norm, args.L2_regularization)) logging.info( "args: #batch_size=%d, epoch=%d, batch=%d, lr=%.4f, optimizer=%s, batch_norm=%d, L2_regularization=%f" % (args.batch_size, args.epoch, args.batch_size, args.lr, args.optimizer, args.batch_norm, args.L2_regularization)) train_data_length = len(data[20]) test_data_length = len(data[21]) model = SLR(train_data_length, test_data_length, args) model.train(sentence_label, data, data_for_validation)
def main(): global domain global distance_file global domain_distance if not len(sys.argv) == 4: print("Input format:") print( "python testCluster.py <k> <domain number> <approach> \n Domain num: \n 0 : accident, 1: sanitation, 2: crime, 3: adult" ) print( "Approach: \n 0 : strong-interpretability (IKC), 1: k-center, 2: Partition, 3: KC_F" ) return k = int(sys.argv[1]) #50 domain_num = int(sys.argv[2]) approach = int(sys.argv[3]) domain = domain_arr[domain_num] domain_distance = distance_arr[domain_num] Ld = LoadData(domain) G = Ld.readFile() distance_file = "" if (os.path.isfile(domain + "_distance.txt")): distance_file = domain + "_distance.txt" print("Dataset:", domain, "K = ", k, "Distance:", domain_distance, "distance file = ", distance_file) if (approach == 0): test_IKC1(G, k, domain, distance_file) elif approach == 1: test_Kcenter(G, k, domain) elif approach == 2: baseline_partition(G, k, domain, distance_file) else: raw_Interpretability(G, k, domain) del Ld
def run(): time.clock() t0 = float(time.clock()) # load data from file, and do normalization on X. [trainX, trainY, testX, testY] = ld.LoadData() t1 = float(time.clock()) print 'Loading data from File. using time %.4f s, \n' % (t1 - t0) [trainX, testX] = nor.Normalization(trainX, testX) t2 = float(time.clock()) print 'Normalization on train & test X. using time %.4f s, \n' % (t2 - t1) # implementation assignments lr_reg = [0.001, 0.01, 0.1, 1, 10, 100] #learning rate max_iter = 1000000 # max iteration eps = 0.001 # gradient comparing epsilon lmd_reg = [0, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 100] # regularization lambda # part 1, lamda = 0, different learning rate best_lr = run_part1(trainX, trainY) #default lr,grad_epsilon and max_iterations # [lr,bestloss,weight,lossCont] = HW1_part_1(trainX,trainY) #default lr,grad_epsilon and max_iterations t3 = float(time.clock()) print 'Part 1, lamda = 0, changing lr, using time %.4f s, \n' % (t3 - t2) # part2: fixed learning rate, different lamda max_iter = 10000 run_part2(trainX, trainY, testX, testY, lmd_reg, best_lr, eps, max_iter) t4 = float(time.clock()) print 'Part 2, lr = 0.05, changing lmd, using time %.4f s, \n' % (t4 - t3) # part3: fixed lr, using 10-fold cross-validation # split training data into k parts max_iter = 1000 k = 10 run_part3(trainX, trainY, testX, testY, best_lr, eps, max_iter, lmd_reg, k) t5 = float(time.clock()) print 'Part 3, lr = 0.05, fidining the best lmd, using time %.4f s, \n' % ( t4 - t3)
def train(args): data = DATA.LoadData(args.path, args.dataset) if args.verbose > 0: print("PNN: dataset=%s, factors=%s, epoch=%d, batch_size=%d, lr=%.4f, keep=%s,\ optimizer=%s, batch_norm=%s, decay=%f, activation=%s, use_inner=%d, D1=%d" %(args.dataset, args.hidden_factor, \ args.epoch, args.batch_size, args.lr, eval(args.keep), args.optimizer, args.batch_norm, args.decay, \ args.activation, args.use_inner, args.D1)) activation_function = tf.nn.relu if args.activation == 'sigmoid': activation_function = tf.sigmoid elif args.activation == 'tanh': activation_function = tf.tanh elif args.activation == 'identity': activation_function = tf.identity save_file = make_save_file(args) ## training t1 = time() model = PNN(data.features_M, data.field, args.hidden_factor, args.pretrain, save_file, activation_function, args.epoch, args.batch_size, args.lr, args.optimizer, args.batch_norm, args.decay, eval(args.keep), args.use_inner, args.D1) model.train(data.Train_data, data.Validation_data, data.Test_data) ## find the best validation result across iterations best_valid_score = 0 if model.greater_is_better: best_valid_score = max(model.valid_rmse) else: best_valid_score = min(model.valid_rmse) best_epoch = model.valid_rmse.index(best_valid_score) print("Best Iter(validation)=%d\t train = %.4f, valid = %.4f [%.1f s]" % (best_epoch + 1, model.train_rmse[best_epoch], model.valid_rmse[best_epoch], time() - t1))
def __init__(self, name, train_dataset, val_dataset, w2v_model_number): np.random.seed(7) self.name = name timestamp = time.time() date_time = str( datetime.datetime.fromtimestamp(timestamp).strftime('%d%m%Y-%H%M')) self.result_folder = r'./ExperimentResults/' + date_time + '_' + self.name os.makedirs(self.result_folder) logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=self.result_folder + '/info.log', filemode='w') sys.excepthook = log_exceptions self.loaded_data = LoadData.LoadData(w2v_model_number, train_dataset, val_dataset, self.result_folder) # get size of state and action, and inputs self.state_space = [SENTENCE_LENGTH, EMBEDDING_SIZE] self.action_size = self.loaded_data.action_size self.image_vector_size = self.loaded_data.image_vector_size # create model for actor network self.model = Models.build_actor_model(self.state_space, self.action_size, self.image_vector_size) with open(self.result_folder + '/model_architecture.txt', 'w') as fh: self.model.summary(print_fn=lambda x: fh.write(x + '\n')) with open(self.result_folder + '/model_config.bin', 'wb') as fh: pickle.dump(self.model.get_config(), fh, protocol=2)
def main(): args = Parser.parse_command_line() samples,outdir = ld.LoadData(**args) dataset = ld.GetData(samples,outdir,**args) # Creation of a directory for trained pickle file pkldir = join(outdir,'trained_pickle') if not os.path.isdir(pkldir): os.system('mkdir -p %s' % pkldir) else: pass if args['pkl_file'] is not None: tag = args['pkl_file'].replace('%s/trained' % pkldir,'').replace('.pkl','') ml = MLA.MLA(tag, dataset, **args) clf = joblib.load(args['pkl_file']) print "\nTrained forest is loaded from %s" % (args['pkl_file']) print clf else: ml,clf = training_go_or_stop(dataset,pkldir,**args) evaluation_go_or_stop(ml,clf,samples,outdir,**args)
''' This main function is specific to the experiments performed in the paper. ''' args = parse_args() if os.path.isdir('logDir'): shutil.rmtree('logDir/') items = build_itemlist('training/ratings.txt', 'testing/ratings.txt', 'error_imgs.txt', args.path) # ordered list of image tweets is created users_discard = [u.lower() for u in ['ConnorRyan90', 'grierrxnash', 'austio311', 'Nodays_off__', 'cutiestylespie', 'eazzz_e', 'eminemlights', 'Im_Wierdd1027', 'shay1498', 'urchkin', 'miley23isHot']] # for these users the tweet history was not long enough for accurate personality extraction users_filtered = build_userlist('users/traits.csv', users_discard, args.path) # the 862 users in the dataset users_all = build_userlist('users/traits.csv', [], args.path) # all the users in the csv data = DATA.LoadData(args.path, args.dataset, items, users_filtered, args.item_ft, args.user_ft, users_all) # instance of class LoadData if args.verbose > 0: print("FM: dataset=%s, factors=%d, num_epoch=%d, batch=%d, lr=%.4f, lambda=%.1e, optimizer=%s" % (args.dataset, args.hidden_factor, args.epoch, args.batch_size, args.lr, args.lamda, args.optimizer)) model = FM(data.features_M, args.hidden_factor, args.epoch, args.batch_size, args.lr, args.lamda, args.optimizer, args.verbose, args.path, len(users_filtered), len(items), args.item_ft, args.user_ft, args.keep_prob, args.batch_norm) model.train(data.Train_data, data.Test_data)
def evaluate(args): # load test data data = DATA.LoadData(args.path, args.dataset).Test_data save_file = make_save_file(args) # load the graph weight_saver = tf.train.import_meta_graph(save_file + '.meta') pretrain_graph = tf.get_default_graph() # load tensors # feature_embeddings = pretrain_graph.get_tensor_by_name('feature_embeddings:0') feature_bias = pretrain_graph.get_tensor_by_name('feature_bias:0') bias = pretrain_graph.get_tensor_by_name('bias:0') afm = pretrain_graph.get_tensor_by_name('afm:0') out_of_afm = pretrain_graph.get_tensor_by_name('out_afm:0') interactions = pretrain_graph.get_tensor_by_name('interactions:0') attention_out = pretrain_graph.get_tensor_by_name('attention_out:0') # placeholders for afm train_features_afm = pretrain_graph.get_tensor_by_name( 'train_features_afm:0') train_labels_afm = pretrain_graph.get_tensor_by_name('train_labels_afm:0') dropout_keep_afm = pretrain_graph.get_tensor_by_name('dropout_keep_afm:0') train_phase_afm = pretrain_graph.get_tensor_by_name('train_phase_afm:0') # tensors and placeholders for fm if args.mla: out_of_fm = pretrain_graph.get_tensor_by_name('out_fm:0') element_wise_product = pretrain_graph.get_tensor_by_name( 'element_wise_product:0') train_features_fm = pretrain_graph.get_tensor_by_name( 'train_features_fm:0') train_labels_fm = pretrain_graph.get_tensor_by_name( 'train_labels_fm:0') dropout_keep_fm = pretrain_graph.get_tensor_by_name( 'dropout_keep_fm:0') train_phase_fm = pretrain_graph.get_tensor_by_name('train_phase_fm:0') # restore session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) weight_saver.restore(sess, save_file) # start evaluation num_example = len(data['Y']) if args.mla: feed_dict = {train_features_afm: data['X'], train_labels_afm: [[y] for y in data['Y']], dropout_keep_afm: [1.0,1.0], train_phase_afm: False, \ train_features_fm: data['X'], train_labels_fm: [[y] for y in data['Y']], dropout_keep_fm: 1.0, train_phase_fm: False} ao, inter, out_fm, predictions = sess.run( (attention_out, interactions, out_of_fm, out_of_afm), feed_dict=feed_dict) else: feed_dict = { train_features_afm: data['X'], train_labels_afm: [[y] for y in data['Y']], dropout_keep_afm: [1.0, 1.0], train_phase_afm: False } predictions = sess.run((out_of_afm), feed_dict=feed_dict) # calculate rmse y_pred_afm = np.reshape(predictions, (num_example, )) y_true = np.reshape(data['Y'], (num_example, )) predictions_bounded = np.maximum(y_pred_afm, np.ones(num_example) * min(y_true)) # bound the lower values predictions_bounded = np.minimum(predictions_bounded, np.ones(num_example) * max(y_true)) # bound the higher values RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded)) print("Test RMSE: %.4f" % (RMSE)) if args.mla: # select significant cases ao = np.reshape(ao, (num_example, 3)) y_pred_fm = np.reshape(out_fm, (num_example, )) pred_abs_fm = abs(y_pred_fm - y_true) pred_abs_afm = abs(y_pred_afm - y_true) pred_abs = pred_abs_afm - pred_abs_fm ids = np.arange(0, num_example, 1) sorted_ids = sorted(ids, key=lambda k: pred_abs_afm[k] + abs(ao[k][0] * ao[ k][1] * ao[k][2])) # sorted_ids = sorted(ids, key=lambda k: abs(ao[k][0]*ao[k][1]*ao[k][2])) for i in range(3): _id = sorted_ids[i] print('## %d: %d' % (i + 1, y_true[_id])) print( '0.33*%.2f + 0.33*%.2f + 0.33*%.2f = %.2f' % (inter[_id][0], inter[_id][1], inter[_id][2], y_pred_fm[_id])) print('%.2f*%.2f + %.2f*%.2f + %.2f*%.2f = %.2f\n'%(\ ao[_id][0], inter[_id][0], \ ao[_id][1], inter[_id][1], \ ao[_id][2], inter[_id][2], y_pred_afm[_id]))
np.random.seed(2019) random_seed = 2019 args = parse_args() if args.dataset == 'lastfm': print('load lastfm data') DATA_ROOT = '../data/lastfm' if args.dataset == 'frappe': print('load frappe data') DATA_ROOT = '../data/frappe' if args.dataset == 'ml-1m': print('load ml-1m data') DATA_ROOT = '../data/ml-1m' f1 = open(os.path.join(DATA_ROOT, 'ENSFM.txt'), 'w') data = DATA.LoadData(DATA_ROOT) with tf.Graph().as_default(): tf.set_random_seed(random_seed) session_conf = tf.ConfigProto() session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): deep = ENSFM(data.item_map_list,data.user_field_M,data.item_field_M, args.embed_size, data.max_positive_len,args) deep._build_graph() train_op1 = tf.train.AdagradOptimizer(learning_rate=args.lr, initial_accumulator_value=1e-8).minimize( deep.loss) sess.run(tf.global_variables_initializer()) batch_size=args.batch_size
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc def verif(dtest_y, predictions): print("\naccuracy_score :", accuracy_score(dtest_y, predictions)) a = classification_report(dtest_y, predictions) print("\nclassification report :\n", (a)) return a # plt.figure(figsize=(13, 10)) # plt.subplot(221) # sns.heatmap(confusion_matrix(dtest_y, predictions), annot=True, fmt="d", linecolor="k", linewidths=3) # plt.title("CONFUSION MATRIX", fontsize=20) experiments = LoadData.LoadData() data = experiments.build_data_frame() k = 0.9 lag_size = 3000 window_size = 3000 baffle_alg = BAFFLE(lag_size, window_size, k=k, alpha=0.3, explained_variance_ratio=0.95) with open('time_intervals.json', 'r') as fp: time_intervals_dict = json.load(fp) features = ['h1', 'h2', 'h3'] column_names = list(set([column[0] for column in data])) column_names.sort()
#Y=[[1]] all_items = data.binded_items.values() #true_item_id=data.binded_items[item] #user_feature_embeddings = tf.nn.embedding_lookup(self.weights['user_feature_embeddings'],X_user) for itemID in xrange(len(all_items)): X_user.append(user_feature) item_feature=[int(feature) for feature in data.item_map[itemID].strip().split('-')[0:]] X_item.append(item_feature) feed_dict = {self.user_features: X_user, self.positive_features: X_item,self.train_phase: False, self.dropout_keep: 1.0} scores=self.sess.run((self.positive),feed_dict=feed_dict) scores=scores.reshape(len(all_items)) return scores if __name__ == '__main__': # Data loading args = parse_args() data = DATA.LoadData(args.path, args.dataset) if args.verbose > 0: print( "FM: dataset=%s, factors=%d, #epoch=%d, batch=%d, lr=%.4f, lambda=%.1e,optimizer=%s, batch_norm=%d, keep=%.2f" % (args.dataset, args.hidden_factor, args.epoch, args.batch_size, args.lr, args.lamda, args.optimizer, args.batch_norm, args.keep_prob)) save_file = '../pretrain/%s_%d' % (args.dataset, args.hidden_factor) # Training t1 = time() model = FM(data.user_field_M, data.item_field_M, args.pretrain, save_file, args.hidden_factor, args.loss_type, args.epoch, args.batch_size, args.lr, args.lamda, args.keep_prob, args.optimizer, args.batch_norm, args.verbose) #model.test() model.train(data.Train_data) #model.test() model.evaluate()
from MATRIX import * day = 24 * 60 * 60. year = 3660. * 24. * 365.25 week = 3660. * 24. * 7. month = week * 4. microarcsecond = np.pi / (180 * 3600 * 1e6) GW_parameters = namedtuple( "GW_parameters", "logGWfrequency logAmplus logAmcross cosTheta Phi DeltaPhiPlus DeltaPhiCross" ) GW_par = gen_rand_GW() #GW_par = GW_parameters( logGWfrequency = np.log(2*np.pi/(3*month)), logAmplus = -12*np.log(10), logAmcross = -12*np.log(10), cosTheta = 0.5, Phi = 1.0, DeltaPhiPlus = 1*np.pi , DeltaPhiCross = np.pi ) star_positions_times_angles = LoadData( "MockAstrometricTimingData/gwastrometry-gaiasimu-1000-randomSphere-v2.dat") number_of_stars = len(star_positions_times_angles) sigma = 100 * microarcsecond / np.sqrt(1.0e9 / number_of_stars) sigma_t = 1.667 * 1.0e-6 / np.sqrt(1.0e9 / number_of_stars) distances = np.random.normal(3.086e16, 1.0e13, len(star_positions_times_angles)) def WapperFunction_FisherMatrix(args): sigma = args[0] sigma_t = args[1] distances = args[2] d = args[3]
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" BATCH_SIZE = 4 IMAGE_SIZE = 256 BUFFER_SIZE = 15000 AUTOTUNE = tf.data.experimental.AUTOTUNE SEED = 25 N_CHANNELS = 3 N_CLASSES = 2 EPOCHS = 3 dataset = LoadData.LoadData( "/home/hossein/synthesisData/training/images/*.png", "/home/hossein/synthesisData/validation/images/*.png", IMAGE_SIZE, BATCH_SIZE, shuffle_buffer_size=5000, seed=123).get_dataset() print(dataset['train']) print(dataset['val']) def display_sample(display_list): """Show side-by-side an input image, the ground truth and the prediction. """ plt.figure(figsize=(18, 18)) title = ['Input Image', 'True Mask', 'Predicted Mask']
y_pred) # I haven't checked the log_loss return logloss ''' # for testing the classification accuracy predictions_binary = [] for item in y_pred: if item > 0.5: predictions_binary.append(1.0) else: predictions_binary.append(0.0) Accuracy = accuracy_score(y_true, predictions_binary) return Accuracy ''' if __name__ == '__main__': # Data loading args = parse_args() data = DATA.LoadData(args.path, args.dataset, args.loss_type) if args.verbose > 0: print( "Neural FM: dataset=%s, hidden_factor=%d, dropout_keep=%s, layers=%s, loss_type=%s, pretrain=%d, #epoch=%d, batch=%d, lr=%.4f, lambda=%.4f, optimizer=%s, batch_norm=%d, activation=%s, early_stop=%d" % (args.dataset, args.hidden_factor, args.keep_prob, args.layers, args.loss_type, args.pretrain, args.epoch, args.batch_size, args.lr, args.lamda, args.optimizer, args.batch_norm, args.activation, args.early_stop)) activation_function = tf.nn.relu if args.activation == 'sigmoid': activation_function = tf.sigmoid elif args.activation == 'tanh': activation_function == tf.tanh elif args.activation == 'identity': activation_function = tf.identity
if type == 'test': self.resultfile.write('\n') print type, ': ', themap, thendcg[0], thendcg[2], thendcg[5], thendcg[ 9] if __name__ == '__main__': dataset = 'MSLR-WEB10K' fold = 'Fold1' datafile = '/home/zengwei/data/' + dataset + '/' + fold + '/' train_data = LoadData(datafile + 'train.txt', dataset) vali_data = LoadData(datafile + 'vali.txt', dataset) test_data = LoadData(datafile + 'test.txt', dataset) nquery = len(train_data.keys()) Nfeature = 136 Learningrate = 0.01 Nepisode = 100 Lenepisode = 10 Resultfile = 'ApprenticeRank/Result_' + dataset + '_' + fold + '_' + time.strftime( "%m%d", time.localtime()) learner = RL(Nfeature, Learningrate, Lenepisode, Resultfile)