def hyper(args): adata = io.read_dataset(args.input, transpose=(not args.transpose), test_split=False) # adata.write(os.path.join(args.outputdir, 'anndatabckup.h5ad')) # adata.write(os.path.join(args.outputdir, 'anndatabckup.h5ad')) # # del adata # # ## adata = io.read_dataset(os.path.join(args.outputdir, 'anndatabckup.h5ad'), ## transpose=False, ## test_split=False) # adata = os.path.join(args.outputdir, 'anndatabckup.h5ad') # adata.isbacked=True # adata = io.normalize(adata, # size_factors=args.sizefactors, # logtrans_input=args.loginput, # normalize_input=args.norminput) hyper_params = { "data": { # "inputData": hp.choice('d_input', (adata, adata)), #"inTranspose": hp.choice('d_inTranspose', (args.transpose, args.transpose)), "norm_input_log": hp.choice('d_norm_log', (True, False)), "norm_input_zeromean": hp.choice('d_norm_zeromean', (True, False)), "norm_input_sf": hp.choice('d_norm_sf', (True, False)), }, "model": { "lr": hp.loguniform("m_lr", np.log(1e-3), np.log(1e-2)), "ridge": hp.loguniform("m_ridge", np.log(1e-7), np.log(1e-1)), "l1_enc_coef": hp.loguniform("m_l1_enc_coef", np.log(1e-7), np.log(1e-1)), "hidden_size": hp.choice("m_hiddensize", ((64, 32, 64), (32, 16, 32), (64, 64), (32, 32), (16, 16), (16, ), (32, ), (64, ), (128, ))), "activation": hp.choice("m_activation", ('relu', 'selu', 'elu', 'PReLU', 'linear', 'LeakyReLU')), "aetype": hp.choice("m_aetype", ('zinb', 'zinb-conddisp')), "batchnorm": hp.choice("m_batchnorm", (True, False)), "dropout": hp.uniform("m_do", 0, 0.7), "input_dropout": hp.uniform("m_input_do", 0, 0.8), }, "fit": { "epochs": args.hyperepoch } } def data_fn(norm_input_log, norm_input_zeromean, norm_input_sf): ad = adata.copy() ad = io.normalize(ad, size_factors=norm_input_sf, logtrans_input=norm_input_log, normalize_input=norm_input_zeromean) x_train = {'count': ad.X, 'size_factors': ad.obs.size_factors} # print(x_train) #x_train = ad.X y_train = adata.X # print(y_train) gc.collect() return (x_train, y_train), # def model_fn(train_data, lr, hidden_size, activation, aetype, batchnorm, # dropout, input_dropout, ridge, l1_enc_coef): # # print("Backend is " + K.backend()) # print(" MB size of train_data" + str(getsizeof(train_data)/1000000)) ## if K.backend() == 'tensorflow': ## K.clear_session() # gc.collect() # print(train_data[1].shape[1]) # net = AE_types[aetype](train_data[1].shape[1], # hidden_size=hidden_size, # l2_coef=0.0, # l1_coef=0.0, # l2_enc_coef=0.0, # l1_enc_coef=l1_enc_coef, # ridge=ridge, # hidden_dropout=dropout, # input_dropout=input_dropout, # batchnorm=batchnorm, # activation=activation, # init='glorot_uniform', # debug=args.debug) # net.build() # net.model.summary() # # optimizer = opt.__dict__['rmsprop'](lr=lr, clipvalue=5.0) # net.model.compile(loss=net.loss, optimizer=optimizer) # # snapshot = tracemalloc.take_snapshot() # display_top(snapshot) # # return net.model output_dir = os.path.join(args.outputdir, 'hyperopt_results') objective = CompileFN('autoencoder_hyperpar_db', 'myexp1', data_fn=data_fn, model_fn=model.model_fn, loss_metric='loss', loss_metric_mode='min', valid_split=.2, save_model=None, save_results=True, use_tensorboard=False, save_dir=output_dir) test_fn(objective, hyper_params, save_model=None) trials = Trials() best = fmin(objective, hyper_params, trials=trials, algo=tpe.suggest, max_evals=args.hypern, catch_eval_exceptions=True) with open(os.path.join(output_dir, 'trials.pickle'), 'wb') as f: pickle.dump(trials, f) #TODO: map indices in "best" back to choice-based hyperpars before saving with open(os.path.join(output_dir, 'best.json'), 'wt') as f: json.dump(best, f, sort_keys=True, indent=4) json.dump(space_eval(hyper_params, trials.argmin), f, sort_keys=True, indent=4) print(best) print(space_eval(hyper_params, trials.argmin))
def hyper(args): ds = io.create_dataset(args.input, output_file=os.path.join(args.outputdir, 'input.zarr'), transpose=args.transpose, test_split=args.testsplit, size_factors=args.normtype) hyper_params = { "data": { "norm_input_log": hp.choice('d_norm_log', (True, False)), "norm_input_zeromean": hp.choice('d_norm_zeromean', (True, False)), "norm_input_sf": hp.choice('d_norm_sf', (True, False)), }, "model": { "lr": hp.loguniform("m_lr", np.log(1e-3), np.log(1e-2)), "ridge": hp.loguniform("m_ridge", np.log(1e-7), np.log(1e-1)), "l1_enc_coef": hp.loguniform("m_l1_enc_coef", np.log(1e-7), np.log(1e-1)), "hidden_size": hp.choice("m_hiddensize", ((64,32,64), (32,16,32), (64,64), (32,32), (16,16), (16,), (32,), (64,), (128,))), "activation": hp.choice("m_activation", ('relu', 'selu', 'elu', 'PReLU', 'linear', 'LeakyReLU')), "aetype": hp.choice("m_aetype", ('zinb', 'zinb-conddisp')), "batchnorm": hp.choice("m_batchnorm", (True, False)), "dropout": hp.uniform("m_do", 0, 0.7), "input_dropout": hp.uniform("m_input_do", 0, 0.8), }, "fit": { "epochs": 100 } } def data_fn(norm_input_log, norm_input_zeromean, norm_input_sf): if norm_input_sf: sf_mat = ds.train.size_factors[:] else: sf_mat = np.ones((ds.train.matrix.shape[0], 1), dtype=np.float32) x_train = {'count': io.normalize(ds.train.matrix[:], sf_mat, logtrans=norm_input_log, sfnorm=norm_input_sf, zeromean=norm_input_zeromean), 'size_factors': sf_mat} y_train = ds.train.matrix[:] return (x_train, y_train), def model_fn(train_data, lr, hidden_size, activation, aetype, batchnorm, dropout, input_dropout, ridge, l1_enc_coef): net = AE_types[aetype](train_data[1].shape[1], hidden_size=hidden_size, l2_coef=0.0, l1_coef=0.0, l2_enc_coef=0.0, l1_enc_coef=l1_enc_coef, ridge=ridge, hidden_dropout=dropout, input_dropout=input_dropout, batchnorm=batchnorm, activation=activation, init='glorot_uniform', debug=args.debug) net.build() optimizer = opt.__dict__['rmsprop'](lr=lr, clipvalue=5.0) net.model.compile(loss=net.loss, optimizer=optimizer) return net.model output_dir = os.path.join(args.outputdir, 'hyperopt_results') objective = CompileFN('autoencoder_hyperpar_db', 'myexp1', data_fn=data_fn, model_fn=model_fn, loss_metric='loss', loss_metric_mode='min', valid_split=.2, save_model=None, save_results=True, use_tensorboard=False, save_dir=output_dir) test_fn(objective, hyper_params, save_model=None) trials = Trials() best = fmin(objective, hyper_params, trials=trials, algo=tpe.suggest, max_evals=args.hypern, catch_eval_exceptions=True) with open(os.path.join(output_dir, 'trials.pickle'), 'wb') as f: pickle.dump(trials, f) #TODO: map indices in "best" back to choice-based hyperpars before saving with open(os.path.join(output_dir, 'best.json'), 'wt') as f: json.dump(best, f, sort_keys=True, indent=4) print(best)
"filters": hp.choice("m_filters", (1, 16)), "dropout_1": hp.choice("m_dropout_1", (0.1, 0.3)), "layers": hp.choice("m_layers", (100, 150)), "dropout_2": hp.choice("m_dropout_2", (0.35, 0.6)), }, "fit": { "x": X_train, "y": Y_train, "batch_size": 32, "epochs": 20, "verbose": 1 } } # test model training, on a small subset for one epoch test_fn(objective, hyper_params) # run hyper-parameter optimization sequentially (without any database) trials = Trials() best = fmin(objective, hyper_params, trials=trials, algo=tpe.suggest, max_evals=2) # run hyper-parameter optimization in parallel (saving the results to MonogoDB) # Follow the hyperopt guide: # https://github.com/hyperopt/hyperopt/wiki/Parallelizing-Evaluations-During-Search-via-MongoDB # KMongoTrials extends hyperopt.MongoTrials with convenience methods trials = KMongoTrials(db_name, exp_name, ip="localhost", port=22334) best = fmin(objective,