def gpn_analysis(): """ Check each component of GPN """ g = utils.GraphGenerator.get_gene_proximity_network( '../data/architecture/genome.txt', 50000) for i, sg in enumerate(g.get_components()): sg.io.dump() conc_vec, pf_vec, pr_vec = utils.get_all_data(g) present( 'Real-Life data PF (all) on GPN component \#%d' % i, plotter.Plotter.loglog, 'gene concentration', conc_vec, 'perron-frobenius eigenvector', pf_vec, plt_args={'alpha': 0.02} ) present( 'Real-Life data pagerank (all) on GPN component \#%d' % i, plotter.Plotter.loglog, 'gene concentration', conc_vec, 'pagerank', pr_vec, plt_args={'alpha': 0.02} ) present( 'Histogram of Real-Life Data (all) on GPN component \#%d' % i, plotter.Plotter.plot_histogram, 'gene concentration', 'count', conc_vec )
def cross_validation(k, methods, Cs_NLK, Cs_SVM, degrees, lambdas): """ Apply cross-validation on NLCK algorithm. A first cross-validation is done on the values of C, d and lambda in NLCK, in order to find the optimal non-linear combination of kernels along with C, d, lambda. Then, for each triplet (and hence the corresponding weights vector), cross validation is done on the regularization constant of C_SVM, C. :param k: int, which dataset to use (k=1, 2 or 3) :param methods: list of string, kernel methods :param Cs_NLK: np.array, regularization constants in NLCK algorithm :param Cs_SVM: np.array, regularization constants in C_SVM algorithm :param degrees: np.array, degrees to explore (usually np.range(1, 5)) :param lambdas: np.array, lambdas (corresponding to parameter 'fnorm' in NLCK) to explore :return: pd.DataFrame with the following columns: - 'methods': kernels method used - 'C_NLCK': regularization constants in NLCK algorithm - 'd': degree in NLCK algorithm - 'lambda': normalization parameter in NLCK algorithm - 'Best C CSVM': best regularization constant in CSVM after cross validation - 'val acc': accuracy obtained on validation set """ # Load data data, data1, data2, data3, kernels, ID = utils.get_all_data(methods) data_k = [data1, data2, data3] # Initialize results DataFrame p = len(kernels) n_param = len(Cs_NLK) * len(degrees) * len(lambdas) init = np.zeros(n_param) results = pd.DataFrame({ 'methods': [methods] * len(init), 'C NLCK': init, 'd': init, 'lambda': init, 'Best C CSVM': init, 'val acc': init }) # Reformat X_train, y_train, X_val, y_val, X_test, kernels, ID = utils.reformat_data( data_k[k - 1], kernels, ID) # Start cross validation on triplet (C, d, lambda) for i, param in tqdm(enumerate(product(Cs_NLK, degrees, lambdas)), total=n_param): C, d, lbda = param print('NLCK C={}, degree={}, lambda={}'.format(C, d, lbda)) # Compute kernel Km = NLCK(X_train, y_train, ID, kernels, C=C, eps=1e-9, degree=d).get_K(fnorm=lbda) # Cross validation on constant C of C-SVM C_opt, scores_tr, scores_te, mean_scores_tr, mean_scores_te = \ utils.cross_validation(Ps=Cs_SVM, data=[X_train, y_train, X_val, y_val, X_test], algo='CSVM', kfolds=3, K=Km, ID=ID, pickleName='cv_C_SVM_NLCK_C{}_d{}_l{}_p{}_k{}.pkl'.format(C, d, lbda, p, k)) # Save results results.iloc[i, 1:6] = C, d, lbda, C_opt, np.max(mean_scores_te) return results
def get(self): login_name, authority = get_login_name(self) author_secure = get_login_author_secure(self) res = get_all_data() self.render('tasks.html', author_secure=author_secure, loginname=login_name, query_id='', all_tasks=res['all_data'], totalpage=res['totalpage'], c_page=0)
def aligned_kernels(methods): """ Apply ALIGNF algorithm for each data set :param methods: list of strings, kernels methods :return: - data: X_train, y_train, X_val, y_val, X_test - data1: X_train_1, y_train_1, X_val_1, y_val_1, X_test_1 - data2: X_train_2, y_train_2, X_val_2, y_val_2, X_test_2 - data3: X_train_3, y_train_3, X_val_3, y_val_3, X_test_3 - aligned_k: list of aligned kernels - ID: np.array, IDs """ data, data1, data2, data3, kernels, ID = utils.get_all_data(methods) aligned_k = [] for d in [data1, data2, data3]: X, y, _, _, _ = d aligned_k.append(ALIGNF(X, y, ID, kernels).get_K()) return data, data1, data2, data3, aligned_k, ID
def timeseries_plot(y_value, companies, start, end): data = get_all_data() traces = [] for c in companies: trace = go.Scatter( name=c, x=data[data["company_name"] == c][start:end].index, y=data[data["company_name"] == c][y_value][start:end], ) traces.append(trace) layout = go.Layout( title=f"Timeseries analysis of {capitalize(y_value)}", xaxis={"title": "Date"}, yaxis={"title": capitalize(y_value)}, ) output_plot = go.Figure(data=traces, layout=layout) return output_plot
def post(self): login_name, authority = get_login_name(self) author_secure = get_login_author_secure(self) query = {} query_id = self.get_argument('query_id', '') if query_id: try: query = {'_id': ObjectId(query_id)} except: query = {'username': query_id} #query = {'$or':[{'username':query_id},{'_id':query_id}]} c_page = int(self.get_argument('c_page', 0)) res = get_all_data(c_page, query, preload_b_e) self.render('everyday_tasks.html', author_secure=author_secure, loginname=login_name, query_id=query_id, all_tasks=res['all_data'], totalpage=res['totalpage'], c_page=c_page)
def real_life_all(): g = utils.GraphGenerator.get_regulatory_graph('../data/architecture/network_tf_gene.txt', '../data/architecture/genome.txt', 50000) conc_vec, pf_vec, pr_vec = utils.get_all_data(g) present( 'Real-Life data PF (all)', plotter.Plotter.loglog, 'gene concentration', conc_vec, 'perron-frobenius eigenvector', pf_vec, plt_args={'alpha': 0.02} ) present( 'Real-Life data pagerank (all)', plotter.Plotter.loglog, 'gene concentration', conc_vec, 'pagerank', pr_vec, plt_args={'alpha': 0.02} ) present( 'Histogram of Real-Life Data (all)', plotter.Plotter.plot_histogram, 'gene concentration', 'count', conc_vec )
task0[i] = threading.Thread(target=profane_recognizer.get_profane_time, args=(0, i, val[0], val[1], val[2])) task0[i].start() for i, _ in enumerate(audio_infor_0): print(str(i + 1) + ' / ' + str(len(audio_infor_0))) task0[i].join() print('Second Step') task1 = [None] * len(audio_infor_1) for i, val in enumerate(audio_infor_1): task1[i] = threading.Thread(target=profane_recognizer.get_profane_time, args=(1, i, val[0], val[1], val[2])) task1[i].start() for i, _ in enumerate(audio_infor_1): print(str(i + 1) + ' / ' + str(len(audio_infor_1))) task1[i].join() #Get result from txt files in result_detector folder print('Load Result') result = utils.get_all_data() print(result) print('Generate sound') utils.generate_sound(result) print('Combine Video and Audio') utils.combine_audio_and_video(input_video) print('Complete!')
import tensorflow as tf import sys from input import DataInput from model import Model from utils import _eval, get_all_data os.environ['CUDA_VISIBLE_DEVICES'] = '1' random.seed(1234) np.random.seed(1234) tf.set_random_seed(1234) train_batch_size = 32 test_batch_size = 512 best_auc = 0.0 train_set, test_set, cate_list, user_count, item_count, cate_count = get_all_data() print("train_set 0", train_set[0]) print("test_set 0", test_set[0]) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: model = Model(user_count, item_count, cate_count, cate_list) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) print('test_gauc: %.4f\t test_auc: %.4f\t best_auc: %.4f' % _eval(sess, model, test_set, test_batch_size, best_auc)) sys.stdout.flush() lr = 0.001 start_time = time.time()
def main(): # set_rnd_seed(31) # reproducibility parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--data_dir_train', type=str, default='./data/brats_19/Train', metavar='DATA_TRAIN', help="data train directory") parser.add_argument('--data_dir_val', type=str, default='./data/brats_19/Validation', metavar='DATA_VAL', help="data validation directory") parser.add_argument('--log_dir', type=str, default='logs/', metavar='LOGS', help="logs directory") parser.add_argument('--models_dir', type=str, default='models/', metavar='MODELS', help="models directory") parser.add_argument('--batch_size', type=int, default=16, metavar='BATCH', help="batch size") parser.add_argument('--learning_rate', type=float, default=2.0e-5, metavar='LR', help="learning rate") parser.add_argument('--epochs', type=int, default=1e6, metavar='EPOCHS', help="number of epochs") parser.add_argument('--zdim', type=int, default=16, metavar='ZDIM', help="Number of dimensions in latent space") parser.add_argument('--load', type=str, default='', metavar='LOADDIR', help="time string of previous run to load from") parser.add_argument('--binary_input', type=bool, default=False, metavar='BINARYINPUT', help="True=one input channel for each tumor structure") parser.add_argument('--use_age', type=bool, default=False, metavar='AGE', help="use age in prediction") parser.add_argument('--use_rs', type=bool, default=False, metavar='RESECTIONSTATUS', help="use resection status in prediction") args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Device in use: {}".format(device)) torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda. is_available() else torch.FloatTensor) logdir_suffix = '-%s-zdim=%d-beta=5000-alpha=%.5f-lr=%.5f-gamma=%d-batch=%d' % ( args.data_dir_train.replace("Train", "").replace(".", "").replace( "/", ""), args.zdim, alpha, args.learning_rate, gamma, args.batch_size) if args.use_age: logdir_suffix += "-age" if args.use_rs: logdir_suffix += "-rs" if args.binary_input: logdir_suffix += "-binary_input" if args.load == "": date_str = str(dt.now())[:-7].replace(":", "-").replace( " ", "-") + logdir_suffix else: date_str = args.load args.models_dir = join(args.models_dir, date_str) args.log_dir = join(args.log_dir, date_str) os.makedirs(args.log_dir, exist_ok=True) os.makedirs(args.models_dir, exist_ok=True) check_args(args) writer = SummaryWriter(args.log_dir + '-train') ## Get dataset data = get_all_data(args.data_dir_train, args.data_dir_val, orig_data_shape, binary_input=args.binary_input) x_data_train_labeled, x_data_train_unlabeled, x_data_val, y_data_train_labeled, y_data_val, y_dim = data if args.binary_input: n_labels = x_data_train_labeled.shape[1] else: n_labels = len( np.bincount(x_data_train_labeled[:10].astype(np.int8).flatten())) x_data_train_labeled = x_data_train_labeled.astype(np.int8) x_data_train_unlabeled = x_data_train_unlabeled.astype(np.int8) x_data_val = x_data_val.astype(np.int8) if args.use_age: age_std = 12.36 age_mean = 62.2 age_l = np.expand_dims(np.load(join(args.data_dir_train, "age_l.npy")), 1) age_u = np.expand_dims(np.load(join(args.data_dir_train, "age_u.npy")), 1) age_v = np.expand_dims(np.load(join(args.data_dir_val, "age.npy")), 1) age_l = (age_l - age_mean) / age_std age_u = (age_u - age_mean) / age_std age_v = (age_v - age_mean) / age_std else: age_l, age_u, age_v = [], [], [] if args.use_rs: rs_l = one_hot(np.load(join(args.data_dir_train, "rs_l.npy")), 2) rs_u = one_hot(np.load(join(args.data_dir_train, "rs_u.npy")), 2) rs_v = one_hot(np.load(join(args.data_dir_val, "rs.npy")), 2) else: rs_l, rs_u, rs_v = [], [], [] if args.use_rs and args.use_age: c_l = np.concatenate([age_l, rs_l], axis=1) c_u = np.concatenate([age_u, rs_u], axis=1) c_v = np.concatenate([age_v, rs_v], axis=1) c_dim = c_l.shape[1] elif args.use_rs: c_l, c_u, c_v = rs_l, rs_u, rs_v c_dim = c_l.shape[1] elif args.use_age: c_l, c_u, c_v = age_l, age_u, age_v c_dim = c_l.shape[1] else: c_l, c_u, c_v = np.array([]), np.array([]), np.array([]) c_dim = 0 y_data_val = y_data_val[:len(x_data_val)] print('x unlabeled data shape:', x_data_train_unlabeled.shape) print('x labeled data shape:', x_data_train_labeled.shape) print('x val data shape:', x_data_val.shape) assert data_shape == tuple(x_data_val.shape[2:]) print('input labels: %d' % n_labels) model = SemiVAE(args.zdim, y_dim, c_dim, n_labels=n_labels, binary_input=args.binary_input).to(device) print_num_params(model) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) start_epoch = 0 if args.load != "": print("Loading model from %s" % args.models_dir) nums = [int(i.split("_")[-1]) for i in os.listdir(args.models_dir)] start_epoch = max(nums) model_path = join(args.models_dir, "model_epoch_%d" % start_epoch) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) if 'model_global_step' in checkpoint.keys(): model.global_step = checkpoint['model_global_step'] start_epoch = checkpoint['epoch'] print("Loaded model at epoch %d, total steps: %d" % (start_epoch, model.global_step)) t_start = dt.now() for epoch in range(int(start_epoch + 1), int(args.epochs)): train(x_data_train_unlabeled, x_data_train_labeled, y_data_train_labeled, x_data_val, y_data_val, c_l, c_u, c_v, args.batch_size, epoch, model, optimizer, device, log_interval, writer, args.log_dir, n_labels) if (dt.now() - t_start).total_seconds() > 3600 * 2: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'model_global_step': model.global_step, }, join(args.models_dir, "model_epoch_%d" % epoch)) t_start = dt.now() sys.stdout.flush() # need this when redirecting to file
check_NLCK = False # Use NLCK algorithm check_CVNLCK = False # Use cross validation on NLCK hyperparameters check_other = False # Free if __name__ == '__main__': if build_kernel: methods = ['GP_k3_g1', 'MM_k5_m1', 'WD_d10'] for method in methods: X_train, y_train, X_val, y_val, X_test, K, ID = utils.get_training_datas(method=method, replace=True) # Put replace = False not to erase the previous saves elif check_method: method = 'MM_k6_m1' algo = 'CSVM' solver = 'CVX' data, data1, data2, data3, K, ID = utils.get_all_data([method]) Cs = np.sort([i * 10 ** j for (i, j) in product(range(1, 10), range(-2, 1))]) # Perform cross validation on data set 1 (TF = 1) utils.cross_validation(Ps=Cs, data=data1, algo=algo, solver=solver, kfolds=3, K=K, ID=ID) elif check_alignf: methods = ['MM_k3_m1', 'WD_d5', 'SS_l1_k3'] data, data1, data2, data3, kernels, ID = ALIGNF.aligned_kernels(methods) K = kernels[0] # 0 index for first data set X_train_1, y_train_1, X_val_1, y_val_1, X_test_1, K_1, ID_1 = utils.reformat_data(data1, [K], ID) Cs = np.sort([i * 10 ** j for (i, j) in product(range(1, 10), range(-3, 2))]) utils.cross_validation(Ps=Cs, data=data1, algo='CSVM', kfolds=5, K=K_1[0], ID=ID_1) elif check_NLCK: methods = ['SP_k6', 'SP_k5', 'SP_k4'] data, data1, data2, data3, kernels, ID = utils.get_all_data(methods)
def get_all_data(): res = [] for tup in utils.get_all_data(): print(tup) res.append({"name": tup[0], "value": int(tup[1])}) return jsonify({"data": res})
print("第 " + str(i) + " 次" + "反向传播训练") train_correct, train_label_predict = self.calculate_correct(train_data, train_label) train_corrects.append(train_correct) ''' print("训练集正确率为: " + str(train_correct)) test_correct, test_label_predict = self.calculate_correct(word_test, res_test) test_corrects.append(test_correct) print("测试集正确率为: " + str(test_correct)) ''' i += 1 return train_corrects, test_corrects word_train, word_test, res_train, res_test = utils.get_data() train, train_label = utils.get_all_data() test = utils.get_test_image_matrix() bp = BPNetwork() if __name__ == '__main__': ''' 初始化神经网络的结构 输入层 28 * 28 = 784 输出层 12 ''' hid = 100 bp.setup(784, 12, hid) # 初始化学习率,训练次数 learn = 0.01 times = 130
from utils import get_all_data, reformat_data, export_predictions from NLCKernels import NLCK from SVM import C_SVM # Best submission : Non-Linear Combination of 10 kernels listed below: methods = [ 'SP_k4', 'SP_k5', 'SP_k6', 'MM_k4_m1', 'MM_k5_m1', 'MM_k6_m1', 'WD_d4', 'WD_d5', 'WD_d10' ] # Import data data, data1, data2, data3, kernels, ID = get_all_data(methods) # Use the algorithm on the first data set with the corresponding hyperparameters (see the report, table 1) print('\n\n') X_train_1, y_train_1, X_val_1, y_val_1, X_test_1, kernels_1, ID_1 = reformat_data( data1, kernels, ID) Km1 = NLCK(X_train_1, y_train_1, ID_1, kernels_1, C=1, eps=1e-9, degree=3).get_K(fnorm=5, n_iter=50) svm1 = C_SVM(Km1, ID_1, C=1.9, solver='CVX') svm1.fit(X_train_1, y_train_1) # Use the algorithm on the second data set with the corresponding hyperparameters (see the report, table 1) print('\n\n') X_train_2, y_train_2, X_val_2, y_val_2, X_test_2, kernels_2, ID_2 = reformat_data( data2, kernels, ID) Km2 = NLCK(X_train_2, y_train_2, ID_2, kernels_2, C=10, eps=1e-9, degree=4).get_K(fnorm=5, n_iter=50) svm2 = C_SVM(Km2, ID_2, C=2.1, solver='CVX') svm2.fit(X_train_2, y_train_2)
return ('Travel between the driver\'s starting location and the shared' ' final destination must be possible by automobile.' ' Please try again.') meetup_location = get_meeting_location( DG, my_df, stops, start_coord, polies, trip_names) if meetup_location == -1: return ('Driver must be passing through the Greater Toronto Area.' ' Please try again.') return 'Arrange to meet at: ' + meetup_location transit_top = 43.90975 transit_left = -79.649908 transit_bottom = 43.591811 transit_right = -79.123111 my_df, stops, trip_names, DG = get_all_data() # start_pedestrian = 'Union station toronto' # start_drive = 'Toronto Public LIbrary' # end_drive = 'High Park toronto' # start_coord, start_address = get_gmaps_coords(start_pedestrian) # (start_address, end_address, polies, points, drive_start_coord, # end_coord) = get_gmaps_route(A=start_drive, B=end_drive) # meetup_location = get_meeting_location( # DG, my_df, stops, start_coord, polies, trip_names) # print(meetup_location) # exit() FB_API_URL = 'https://graph.facebook.com/v2.6/me/messages' ACCESS_TOKEN = os.environ['ACCESS_TOKEN'] VERIFY_TOKEN = os.environ['VERIFY_TOKEN']