Пример #1
0
def hyper(args):

    adata = io.read_dataset(args.input,
                            transpose=(not args.transpose),
                            test_split=False)

    #    adata.write(os.path.join(args.outputdir, 'anndatabckup.h5ad'))
    #    adata.write(os.path.join(args.outputdir, 'anndatabckup.h5ad'))
    #
    #    del adata
    #
    #
    ##    adata = io.read_dataset(os.path.join(args.outputdir, 'anndatabckup.h5ad'),
    ##                            transpose=False,
    ##                            test_split=False)
    #    adata = os.path.join(args.outputdir, 'anndatabckup.h5ad')
    #    adata.isbacked=True
    #    adata = io.normalize(adata,
    #                         size_factors=args.sizefactors,
    #                         logtrans_input=args.loginput,
    #                         normalize_input=args.norminput)

    hyper_params = {
        "data": {
            #                "inputData": hp.choice('d_input', (adata, adata)),
            #"inTranspose": hp.choice('d_inTranspose', (args.transpose, args.transpose)),
            "norm_input_log": hp.choice('d_norm_log', (True, False)),
            "norm_input_zeromean": hp.choice('d_norm_zeromean', (True, False)),
            "norm_input_sf": hp.choice('d_norm_sf', (True, False)),
        },
        "model": {
            "lr":
            hp.loguniform("m_lr", np.log(1e-3), np.log(1e-2)),
            "ridge":
            hp.loguniform("m_ridge", np.log(1e-7), np.log(1e-1)),
            "l1_enc_coef":
            hp.loguniform("m_l1_enc_coef", np.log(1e-7), np.log(1e-1)),
            "hidden_size":
            hp.choice("m_hiddensize",
                      ((64, 32, 64), (32, 16, 32), (64, 64), (32, 32),
                       (16, 16), (16, ), (32, ), (64, ), (128, ))),
            "activation":
            hp.choice("m_activation",
                      ('relu', 'selu', 'elu', 'PReLU', 'linear', 'LeakyReLU')),
            "aetype":
            hp.choice("m_aetype", ('zinb', 'zinb-conddisp')),
            "batchnorm":
            hp.choice("m_batchnorm", (True, False)),
            "dropout":
            hp.uniform("m_do", 0, 0.7),
            "input_dropout":
            hp.uniform("m_input_do", 0, 0.8),
        },
        "fit": {
            "epochs": args.hyperepoch
        }
    }

    def data_fn(norm_input_log, norm_input_zeromean, norm_input_sf):

        ad = adata.copy()
        ad = io.normalize(ad,
                          size_factors=norm_input_sf,
                          logtrans_input=norm_input_log,
                          normalize_input=norm_input_zeromean)

        x_train = {'count': ad.X, 'size_factors': ad.obs.size_factors}
        #        print(x_train)
        #x_train = ad.X
        y_train = adata.X
        #        print(y_train)
        gc.collect()
        return (x_train, y_train),

#    def model_fn(train_data, lr, hidden_size, activation, aetype, batchnorm,
#                 dropout, input_dropout, ridge, l1_enc_coef):
#
#        print("Backend is " + K.backend())
#        print(" MB size of train_data" + str(getsizeof(train_data)/1000000))
##        if K.backend() == 'tensorflow':
##          K.clear_session()
#        gc.collect()
#        print(train_data[1].shape[1])
#        net = AE_types[aetype](train_data[1].shape[1],
#                hidden_size=hidden_size,
#                l2_coef=0.0,
#                l1_coef=0.0,
#                l2_enc_coef=0.0,
#                l1_enc_coef=l1_enc_coef,
#                ridge=ridge,
#                hidden_dropout=dropout,
#                input_dropout=input_dropout,
#                batchnorm=batchnorm,
#                activation=activation,
#                init='glorot_uniform',
#                debug=args.debug)
#        net.build()
#        net.model.summary()
#
#        optimizer = opt.__dict__['rmsprop'](lr=lr, clipvalue=5.0)
#        net.model.compile(loss=net.loss, optimizer=optimizer)
#
#        snapshot = tracemalloc.take_snapshot()
#        display_top(snapshot)
#
#        return net.model

    output_dir = os.path.join(args.outputdir, 'hyperopt_results')
    objective = CompileFN('autoencoder_hyperpar_db',
                          'myexp1',
                          data_fn=data_fn,
                          model_fn=model.model_fn,
                          loss_metric='loss',
                          loss_metric_mode='min',
                          valid_split=.2,
                          save_model=None,
                          save_results=True,
                          use_tensorboard=False,
                          save_dir=output_dir)

    test_fn(objective, hyper_params, save_model=None)

    trials = Trials()
    best = fmin(objective,
                hyper_params,
                trials=trials,
                algo=tpe.suggest,
                max_evals=args.hypern,
                catch_eval_exceptions=True)

    with open(os.path.join(output_dir, 'trials.pickle'), 'wb') as f:
        pickle.dump(trials, f)

    #TODO: map indices in "best" back to choice-based hyperpars before saving
    with open(os.path.join(output_dir, 'best.json'), 'wt') as f:
        json.dump(best, f, sort_keys=True, indent=4)
        json.dump(space_eval(hyper_params, trials.argmin),
                  f,
                  sort_keys=True,
                  indent=4)

    print(best)
    print(space_eval(hyper_params, trials.argmin))
Пример #2
0
def hyper(args):
    ds = io.create_dataset(args.input,
                           output_file=os.path.join(args.outputdir, 'input.zarr'),
                           transpose=args.transpose,
                           test_split=args.testsplit,
                           size_factors=args.normtype)

    hyper_params = {
            "data": {
                "norm_input_log": hp.choice('d_norm_log', (True, False)),
                "norm_input_zeromean": hp.choice('d_norm_zeromean', (True, False)),
                "norm_input_sf": hp.choice('d_norm_sf', (True, False)),
                },
            "model": {
                "lr": hp.loguniform("m_lr", np.log(1e-3), np.log(1e-2)),
                "ridge": hp.loguniform("m_ridge", np.log(1e-7), np.log(1e-1)),
                "l1_enc_coef": hp.loguniform("m_l1_enc_coef", np.log(1e-7), np.log(1e-1)),
                "hidden_size": hp.choice("m_hiddensize", ((64,32,64), (32,16,32),
                                                          (64,64), (32,32), (16,16),
                                                          (16,), (32,), (64,), (128,))),
                "activation": hp.choice("m_activation", ('relu', 'selu', 'elu',
                                                         'PReLU', 'linear', 'LeakyReLU')),
                "aetype": hp.choice("m_aetype", ('zinb', 'zinb-conddisp')),
                "batchnorm": hp.choice("m_batchnorm", (True, False)),
                "dropout": hp.uniform("m_do", 0, 0.7),
                "input_dropout": hp.uniform("m_input_do", 0, 0.8),
                },
            "fit": {
                "epochs": 100
                }
    }

    def data_fn(norm_input_log, norm_input_zeromean, norm_input_sf):
        if norm_input_sf:
            sf_mat = ds.train.size_factors[:]
        else:
            sf_mat = np.ones((ds.train.matrix.shape[0], 1),
                             dtype=np.float32)

        x_train = {'count': io.normalize(ds.train.matrix[:],
                                         sf_mat, logtrans=norm_input_log,
                                         sfnorm=norm_input_sf,
                                         zeromean=norm_input_zeromean),
                    'size_factors': sf_mat}
        y_train = ds.train.matrix[:]

        return (x_train, y_train),

    def model_fn(train_data, lr, hidden_size, activation, aetype, batchnorm,
                 dropout, input_dropout, ridge, l1_enc_coef):
        net = AE_types[aetype](train_data[1].shape[1],
                hidden_size=hidden_size,
                l2_coef=0.0,
                l1_coef=0.0,
                l2_enc_coef=0.0,
                l1_enc_coef=l1_enc_coef,
                ridge=ridge,
                hidden_dropout=dropout,
                input_dropout=input_dropout,
                batchnorm=batchnorm,
                activation=activation,
                init='glorot_uniform',
                debug=args.debug)
        net.build()

        optimizer = opt.__dict__['rmsprop'](lr=lr, clipvalue=5.0)
        net.model.compile(loss=net.loss, optimizer=optimizer)

        return net.model

    output_dir = os.path.join(args.outputdir, 'hyperopt_results')
    objective = CompileFN('autoencoder_hyperpar_db', 'myexp1',
                          data_fn=data_fn,
                          model_fn=model_fn,
                          loss_metric='loss',
                          loss_metric_mode='min',
                          valid_split=.2,
                          save_model=None,
                          save_results=True,
                          use_tensorboard=False,
                          save_dir=output_dir)

    test_fn(objective, hyper_params, save_model=None)

    trials = Trials()
    best = fmin(objective,
                hyper_params,
                trials=trials,
                algo=tpe.suggest,
                max_evals=args.hypern,
                catch_eval_exceptions=True)

    with open(os.path.join(output_dir, 'trials.pickle'), 'wb') as f:
        pickle.dump(trials, f)

    #TODO: map indices in "best" back to choice-based hyperpars before saving
    with open(os.path.join(output_dir, 'best.json'), 'wt') as f:
        json.dump(best, f, sort_keys=True, indent=4)

    print(best)
        "filters": hp.choice("m_filters", (1, 16)),
        "dropout_1": hp.choice("m_dropout_1", (0.1, 0.3)),
        "layers": hp.choice("m_layers", (100, 150)),
        "dropout_2": hp.choice("m_dropout_2", (0.35, 0.6)),
    },
    "fit": {
        "x": X_train,
        "y": Y_train,
        "batch_size": 32,
        "epochs": 20,
        "verbose": 1
    }
}

# test model training, on a small subset for one epoch
test_fn(objective, hyper_params)

# run hyper-parameter optimization sequentially (without any database)
trials = Trials()
best = fmin(objective,
            hyper_params,
            trials=trials,
            algo=tpe.suggest,
            max_evals=2)

# run hyper-parameter optimization in parallel (saving the results to MonogoDB)
# Follow the hyperopt guide:
# https://github.com/hyperopt/hyperopt/wiki/Parallelizing-Evaluations-During-Search-via-MongoDB
# KMongoTrials extends hyperopt.MongoTrials with convenience methods
trials = KMongoTrials(db_name, exp_name, ip="localhost", port=22334)
best = fmin(objective,