示例#1
0
    def create_optimizer(self, algorithm_name):

        # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        for param in self.search_space.params:
            key = BaseChocolateService.encode(param.name)
            # Chocolate quantized_uniform distribution uses half-open interval: [low, high).
            if param.type == INTEGER:
                chocolate_search_space[key] = choco.quantized_uniform(
                    int(param.min),
                    int(param.max) + int(param.step), int(param.step))
            elif param.type == DOUBLE:
                chocolate_search_space[key] = choco.quantized_uniform(
                    float(param.min),
                    float(param.max) + float(param.step), float(param.step))
            # For Categorical and Discrete insert indexes to DB from list of values
            elif param.type == CATEGORICAL or param.type == DISCRETE:
                chocolate_search_space[key] = choco.choice(
                    [idx for idx, _ in enumerate(param.list)])

        if algorithm_name in DEPRECATED_ALGORITHM_NAME:
            warnings.warn(
                "Algorithm name '{}' is deprecated. Please use '{}'.".format(
                    algorithm_name,
                    DEPRECATED_ALGORITHM_NAME[algorithm_name],
                ),
                DeprecationWarning,
            )
            algorithm_name = DEPRECATED_ALGORITHM_NAME[algorithm_name]

        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if algorithm_name == "grid":
            self.chocolate_optimizer = choco.Grid(self.conn,
                                                  chocolate_search_space,
                                                  clear_db=True)
        # hyperopt-random is the default option in katib.
        elif algorithm_name == "random":
            self.chocolate_optimizer = choco.Random(self.conn,
                                                    chocolate_search_space,
                                                    clear_db=True)
        elif algorithm_name == "quasirandom":
            self.chocolate_optimizer = choco.QuasiRandom(
                self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "bayesianoptimization":
            self.chocolate_optimizer = choco.Bayes(self.conn,
                                                   chocolate_search_space,
                                                   clear_db=True)
        # elif self.algorithm_name == "chocolate-CMAES":
        #     self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "mocmaes":
            mu = 1
            self.chocolate_optimizer = choco.MOCMAES(self.conn,
                                                     chocolate_search_space,
                                                     mu=mu,
                                                     clear_db=True)
        else:
            raise Exception(
                '"Failed to create Chocolate optimizer for the algorithm: {}'.
                format(algorithm_name))
示例#2
0
def main():
    X, y = load_boston(return_X_y=True)

    # Connect to sqlite database in current directory
    conn = choco.SQLiteConnection(url="sqlite:///gbt-boston.db")
    s = {"learning_rate": choco.uniform(0.001, 0.1),
         "n_estimators": choco.quantized_uniform(25, 525, 1),
         "max_depth": choco.quantized_uniform(2, 25, 1),
         "subsample": choco.uniform(0.7, 1.0)}

    sampler = choco.QuasiRandom(conn, s, random_state=110, skip=3)
    token, params = sampler.next()
    loss = score_gbt(X, y, params)
    sampler.update(token, loss)
示例#3
0
    def create_optimizer(self, algorithm_name):

        # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        for param in self.search_space.params:
            key = BaseChocolateService.encode(param.name)
            if param.type == INTEGER:
                chocolate_search_space[key] = choco.quantized_uniform(
                    int(param.min), int(param.max), int(param.step))
            elif param.type == DOUBLE:
                chocolate_search_space[key] = choco.quantized_uniform(
                    float(param.min), float(param.max), float(param.step))
            elif param.type == CATEGORICAL:
                chocolate_search_space[key] = choco.choice(param.list)
            else:
                chocolate_search_space[key] = choco.choice(
                    [float(e) for e in param.list])

        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if algorithm_name == "grid":
            self.chocolate_optimizer = choco.Grid(self.conn,
                                                  chocolate_search_space,
                                                  clear_db=True)
        # hyperopt-random is the default option in katib.
        elif algorithm_name == "chocolate-random":
            self.chocolate_optimizer = choco.Random(self.conn,
                                                    chocolate_search_space,
                                                    clear_db=True)
        elif algorithm_name == "chocolate-quasirandom":
            self.chocolate_optimizer = choco.QuasiRandom(
                self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "chocolate-bayesian-optimization":
            self.chocolate_optimizer = choco.Bayes(self.conn,
                                                   chocolate_search_space,
                                                   clear_db=True)
        # elif self.algorithm_name == "chocolate-CMAES":
        #     self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True)
        elif algorithm_name == "chocolate-mocmaes":
            mu = 1
            self.chocolate_optimizer = choco.MOCMAES(self.conn,
                                                     chocolate_search_space,
                                                     mu=mu,
                                                     clear_db=True)
        else:
            raise Exception(
                '"Failed to create Chocolate optimizer for the algorithm: {}'.
                format(algorithm_name))
示例#4
0
 def setUp(self):
     l1 = log(low=-3, high=5, base=10)
     l2 = log(low=-2, high=3, base=10)
     u = uniform(low=-1, high=1)
     qu = quantized_uniform(low=1, high=20, step=1)
     self.space = Space([{
         "algo": {
             "svm": {
                 "C": l1,
                 "kernel": {
                     "linear": None,
                     "rbf": {
                         "gamma": l2
                     }
                 },
                 "cond2": {
                     "aa": None,
                     "bb": {
                         "abc": u
                     }
                 }
             },
             "knn": {
                 "n_neighbors": qu
             }
         }
     }, {
         "cond3": 0,
         "p": l1,
         "p2": qu
     }])
示例#5
0
def convert_param_to_choco(param):
    """Convert a single search parameter suitably for ``chocolate``.
    """
    from math import log10
    import chocolate as choco

    if param['type'] == 'BOOL':
        return choco.choice([False, True])
    if param['type'] == 'INT':
        return choco.quantized_uniform(low=param['min'],
                                       high=param['max'] + 1,
                                       step=1)
    if param['type'] == 'STRING':
        return choco.choice(param['options'])
    if param['type'] == 'FLOAT':
        return choco.uniform(low=param['min'], high=param['max'])
    if param['type'] == 'FLOAT_EXP':
        return choco.log(low=log10(param['min']),
                         high=log10(param['max']),
                         base=10)
    else:
        raise ValueError("Didn't understand space {}.".format(param))
示例#6
0
def create_space():
    space = {
        "learning_rate":
        choco.log(low=-5, high=-2, base=10),
        "dropout_keep_prob":
        choco.quantized_uniform(low=0.0, high=0.95, step=0.05),
        "num_filters":
        choco.quantized_uniform(low=50, high=200, step=10),
        "batch_size":
        choco.quantized_uniform(low=64, high=256, step=16),
        "num_epochs":
        choco.quantized_uniform(low=100, high=200, step=10),
        "l2_reg_lambda":
        choco.quantized_uniform(low=0.0, high=10.0, step=0.5),
        "eps":
        choco.quantized_uniform(low=1.0, high=10.0, step=0.02),
        "dev_sample_percentage":
        choco.quantized_uniform(low=0.1, high=0.3, step=0.01)
    }

    return space
    def getSuggestions(self, search_space, trials, request_number):
        """
        Get the new suggested trials with chocolate algorithm.
        """

        # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)}
        chocolate_search_space = {}

        for param in search_space.params:
            key = BaseChocolateService.encode(param.name)
            if param.type == INTEGER:
                chocolate_search_space[key] = choco.quantized_uniform(
                    int(param.min), int(param.max), 1)
            elif param.type == DOUBLE:
                chocolate_search_space[key] = choco.quantized_uniform(
                    float(param.min), float(param.max), float(param.step))
            elif param.type == CATEGORICAL:
                chocolate_search_space[key] = choco.choice(param.list)
            else:
                chocolate_search_space[key] = choco.choice(
                    [float(e) for e in param.list])

        conn = choco.SQLiteConnection("sqlite:///my_db.db")
        # Refer to https://chocolate.readthedocs.io/tutorials/algo.html
        if self.algorithm_name == "grid":
            sampler = choco.Grid(conn, chocolate_search_space, clear_db=True)
        # hyperopt-random is the default option in katib.
        elif self.algorithm_name == "chocolate-random":
            sampler = choco.Random(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "chocolate-quasirandom":
            sampler = choco.QuasiRandom(conn,
                                        chocolate_search_space,
                                        clear_db=True)
        elif self.algorithm_name == "chocolate-bayesian-optimization":
            sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True)
        # elif self.algorithm_name == "chocolate-CMAES":
        #     sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True)
        elif self.algorithm_name == "chocolate-MOCMAES":
            mu = 1
            sampler = choco.MOCMAES(conn,
                                    chocolate_search_space,
                                    mu=mu,
                                    clear_db=True)
        else:
            raise Exception('"Failed to create the algortihm: {}'.format(
                self.algorithm_name))

        for index, trial in enumerate(trials):
            loss_for_choco = float(trial.target_metric.value)
            if search_space.goal == MAX_GOAL:
                loss_for_choco = -1 * loss_for_choco

            entry = {"_chocolate_id": index, "_loss": loss_for_choco}
            for param in search_space.params:
                param_assignment = None
                for assignment in trial.assignments:
                    if param.name == assignment.name:
                        param_assignment = assignment.value
                        break
                if param.type == INTEGER:
                    param_assignment = int(param_assignment)
                elif param.type == DOUBLE:
                    param_assignment = float(param_assignment)
                entry.update({
                    BaseChocolateService.encode(param.name):
                    param_assignment
                })
            logger.info(entry)
            # Should not use sampler.update(token, loss), because we will create
            # a new BaseChocolateService instance for every request. Thus we need
            # to insert all previous trials every time.
            conn.insert_result(entry)

        list_of_assignments = []

        for i in range(request_number):
            try:
                token, chocolate_params = sampler.next()
                list_of_assignments.append(
                    BaseChocolateService.convert(search_space,
                                                 chocolate_params))
            except StopIteration:
                logger.info(
                    "Chocolate db is exhausted, increase Search Space or decrease maxTrialCount!"
                )
        return list_of_assignments
示例#8
0
def hyperparameter_job_(train_methyl_array,
						val_methyl_array,
						interest_col,
						n_bins,
						custom_loss,
						torque,
						search_strategy,
						total_time,
						delay_time,
						gpu,
						additional_command,
						additional_options,
						update,
						n_epochs,
						job,
						survival,
						optimize_time,
						random_state,
						capsule_choice,
						custom_capsule_file,
						retrain_top_job,
						batch_size,
						output_top_job_params,
						limited_capsule_names_file,
						min_capsule_len_low_bound,
						gsea_superset,
						tissue,
						number_sets,
						use_set,
						gene_context,
						select_subtypes,
						custom_hyperparameters,
						min_capsules,
						fit_spw,
						l1_l2):

	additional_params=dict(train_methyl_array=train_methyl_array,
							val_methyl_array=val_methyl_array,
							interest_col=interest_col,
							n_bins=n_bins,
							custom_loss=custom_loss,
							job=job,
							batch_size=batch_size,
							number_sets=number_sets,
							min_capsules=min_capsules
							)

	if n_epochs:
		additional_params['n_epochs']=n_epochs

	if gsea_superset:
		additional_params['gsea_superset']=gsea_superset

	if l1_l2:
		additional_params['l1_l2']=l1_l2

	if tissue:
		additional_params['tissue']=tissue

	if custom_capsule_file:
		additional_params['custom_capsule_file']=custom_capsule_file

	if output_top_job_params:
		retrain_top_job=True

	if limited_capsule_names_file:
		additional_params['limited_capsule_names_file']=limited_capsule_names_file

	if update and not (retrain_top_job and output_top_job_params):
		additional_params['capsule_choice']=capsule_choice
		select_subtypes=list(filter(None,select_subtypes))
		if select_subtypes:
			additional_params['select_subtypes']=select_subtypes
		if use_set:
			additional_params['use_set']=use_set
		if gene_context:
			additional_params['gene_context']=gene_context
		if fit_spw:
			additional_params['fit_spw']=fit_spw
	else:
		select_subtypes=list(filter(None,select_subtypes))
		if select_subtypes:
			additional_params['select_subtypes']=' -ss '.join(list(filter(None,select_subtypes)))
		additional_params['capsule_choice']=' -cc '.join(list(filter(None,capsule_choice)))
		if use_set:
			additional_params['use_set']=''
		if gene_context:
			additional_params['gene_context']=''
		if fit_spw:
			additional_params['fit_spw']=''

	if not survival:
		additional_params['gamma2']=1e-2

	def score_loss(params):
		#job=np.random.randint(0,1000000)
		start_time=time.time()

		params['hidden_topology']=','.join([str(int(params['el{}s'.format(j)])) for j in range(params['nehl']+1)])
		params['decoder_topology']=','.join([str(int(params['dl{}s'.format(j)])) for j in range(params['ndhl']+1)])

		del_params=['el{}s'.format(j) for j in range(params['nehl']+1)]+['dl{}s'.format(j) for j in range(params['ndhl']+1)]

		del_params=set(del_params+[k for k in params if k.startswith('el') or k.startswith('dl')])
		# for k in list(params.keys()):
		# 	if k.endswith('_size'):
		# 		del params[k]
		# print(params)
		# print(params['nehl'],params['ndhl'])
		# print(del_params)
		for param in del_params:
			del params[param]

		del params['nehl'], params['ndhl']

		params.update(additional_params)

		print(params)

		command='{} methylcaps-model model_capsnet {} || methylcaps-model report_loss -j {}'.format('CUDA_VISIBLE_DEVICES=0' if gpu and not torque else '',' '.join(['--{} {}'.format(k,v) for k,v in params.items() if v or k=='use_set']),params['job'])#,'&' if not torque else '')

		if output_top_job_params and retrain_top_job:
			print('Top params command: ')
			print('{} --predict'.format(command.split('||')[0]))
			exit()
		elif output_top_job_params:
			print('Continuing training of random parameters, please specify retrain_top_job.')

		if update:

			val_loss = model_capsnet_(**params)

		else:

			val_loss = return_val_loss(command, torque, total_time, delay_time, job, gpu, additional_command, additional_options)

		end_time=time.time()

		if optimize_time:
			return val_loss, start_time-end_time
		else:
			return val_loss

	grid=dict(n_epochs=dict(low=10, high=50, step=10),
				bin_len=dict(low=500000, high=1000000, step=100000),
				min_capsule_len=dict(low=min_capsule_len_low_bound, high=500, step=25),
				primary_caps_out_len=dict(low=10, high=100, step=5),
				caps_out_len=dict(low=10, high=100, step=5),
				nehl=dict(low=10,high=300,step=10,n_layers=3),
				ndhl=dict(low=100,high=300,step=10,n_layers=3),
				learning_rate=dict(low=-5,high=-1,step=1,base=10),
				gamma=dict(low=-5,high=-1,step=1,base=10),
				gamma2=dict(low=-5,high=-1,step=1,base=10),
				overlap=dict(low=0., high=.5, step=.1),
				routing_iterations=dict(low=2, high=4, step=1))

	if os.path.exists(custom_hyperparameters):
		from ruamel.yaml import safe_load as load
		with open(custom_hyperparameters) as f:
			new_grid = load(f)
		print(new_grid)
		for k in new_grid:
			for k2 in new_grid[k]:
				grid[k][k2]=new_grid[k][k2]


	n_layers=dict(encoder=grid['nehl'].pop('n_layers'),decoder=grid['ndhl'].pop('n_layers'))


	grid=dict(n_epochs=choco.quantized_uniform(**grid['n_epochs']),
				bin_len=choco.quantized_uniform(**grid['bin_len']),
				min_capsule_len=choco.quantized_uniform(**grid['min_capsule_len']),
				primary_caps_out_len=choco.quantized_uniform(**grid['primary_caps_out_len']),
				caps_out_len=choco.quantized_uniform(**grid['caps_out_len']),
				nehl={i: {'el{}s'.format(j):choco.quantized_uniform(**grid['nehl']) for j in range(i+1)} for i in range(n_layers['encoder'])},
				gamma=choco.quantized_log(**grid['gamma']),
				ndhl={i: {'dl{}s'.format(j):choco.quantized_uniform(**grid['ndhl']) for j in range(i+1)} for i in range(n_layers['decoder'])},
				learning_rate=choco.quantized_log(**grid['learning_rate']),
				routing_iterations=choco.quantized_uniform(**grid['routing_iterations']),
				overlap=choco.quantized_uniform(**grid['overlap']),
				gamma2=choco.quantized_log(**grid['gamma2'])
			) # ADD BATCH SIZE

	if n_epochs:
		grid.pop('n_epochs')

	if not survival:
		grid.pop('gamma2')

	if 'genomic_binned' not in list(capsule_choice):
		for k in ['overlap','bin_len']:
			grid.pop(k)

	if retrain_top_job:

		conn=choco.SQLiteConnection('sqlite:///hyperparameter_scan.db')
		results=conn.results_as_dataframe()
		results=results[~results['_loss'].isnull()]
		params=dict(results.iloc[np.argmin(results['_loss'].values)])
		for k in ['bin_len','caps_out_len','min_capsule_len','ndhl','nehl','primary_caps_out_len','routing_iterations']:
			if k in params:
				params[k]=int(params[k])

		del params['_loss']

		top_loss=score_loss(params)

		pickle.dump(top_loss,open('top_loss.pkl','wb'))

	else:

		optimization_method = search_strategy#'bayes'
		optimization_methods=['random','quasi','bayes']

		sampler_opts={}

		if optimization_method in ['random']:
			sampler_opts['n_bootstrap']=10000
			#sampler_opts['random_state']=random_state
		elif optimization_method in ['quasi']:
			sampler_opts['seed']=random_state
			sampler_opts['skip']=3
		elif optimization_method in ['bayes']:
			sampler_opts['n_bootstrap']=35
			sampler_opts['utility_function']='ei'
			sampler_opts['xi']=0.1
			#sampler_opts['random_state']=42

		#print(optimization_method)
		optimizer = dict(random=choco.Bayes,quasi=choco.QuasiRandom,bayes=choco.Bayes)[optimization_method] # Random

		hyp_conn = choco.SQLiteConnection(url="sqlite:///hyperparameter_scan.db")

		sampler = optimizer(hyp_conn, grid, **sampler_opts)

		#print(sampler)

		if 0 and optimization_method in ['bayes']:
			sampler.random_state=np.random.RandomState(42)

		token,params=sampler.next()

		loss=score_loss(params)

		if (loss if not optimize_time else loss[0])>=0:
			sampler.update(token, loss)
示例#9
0
        y_pred = m.predict(tst_x)
        return -1*skm.f1_score(tst_y, y_pred, average='macro')

space = [
    # {'model': 'RandomForestClassifier',
    #     "max_depth"       : choco.quantized_uniform(2, 32, 2),
    #     "min_samples_split": choco.quantized_uniform(2, 600, 2),
    #     "n_estimators"    : choco.quantized_uniform(125, 800, 25),},
    {'model': 'SVC',
        "gamma": 'auto',
        "C": choco.log(-3, 3, 10),
        "kernel": choco.choice(['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']),
        "tol": choco.log(-5, -2, 10),},
    {'model': 'XGBClassifier',
        "learning_rate"   : choco.uniform(0.001, 0.1),
        "max_depth"       : choco.quantized_uniform(2, 16, 2),
        "min_child_weight": choco.quantized_uniform(2, 10, 2),
        "subsample"       : choco.quantized_uniform(0.7, 1.05, 0.05),
        "n_estimators"    : choco.quantized_uniform(25, 525, 25),},
    {'model': 'LogisticRegression',
        "penalty"         : choco.choice(['l1', 'l2']),
        "C"               : choco.log(-2, 1, 10),},
]

models = {
    'RandomForestClassifier': RandomForestClassifier,
    'SVC': SVC,
    'XGBClassifier': XGBClassifier,
    'LogisticRegression': LogisticRegression,
}
            print("Exception occurred.")
            self.validation_error = 100000.0

if __name__ == '__main__':
    if len(sys.argv) == 1:
        # original params from article
        model=LatentAttention(frac_train=0.99, n_z=20, batchsize=100,
                              learning_rate=0.001, max_epochs=10,
                              e_h1=16, e_h2=32, d_h1=32, d_h2=16, run_id=-1);
        model.train()
        print("loss={}".format(float(model.validation_error)))
        exit(0)

    # Params from optimizer
    search_space = {
        "n_z": choco.quantized_uniform(5, 100, 1),
        "learning_rate": choco.log(-20, -8, 2),
        "max_epochs": choco.quantized_uniform(5, 200, 1),
        "e_h1": choco.quantized_uniform(16, 256, 1),
        "e_h2": choco.quantized_uniform(16, 256, 1),
        "d_h1": choco.quantized_uniform(16, 256, 1),
        "d_h2": choco.quantized_uniform(16, 256, 1),
    }
    connection = choco.SQLiteConnection("sqlite:///no_labels_results.sqlite3")
    sampler = choco.Bayes(connection, search_space)
    token, sample = sampler.next()
    print("Parameters: {} Token: {}".format(sample, token))
    run_id = token['_chocolate_id']
    model = LatentAttention(0.99, batchsize=150, run_id=run_id, **sample)
    model.train()
    sampler.update(token, float(model.validation_error))
示例#11
0
    return -1 * skm.f1_score(tst_y, y_pred, average='macro')


space = [
    {
        'model': 'SVC',
        "gamma": 'auto',
        "C": choco.log(-3, 3, 10),
        "kernel":
        choco.choice(['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']),
        "tol": choco.log(-5, -2, 10),
    },
    {
        'model': 'XGBClassifier',
        "learning_rate": choco.uniform(0.001, 0.1),
        "max_depth": choco.quantized_uniform(2, 16, 2),
        "min_child_weight": choco.quantized_uniform(2, 10, 2),
        "subsample": choco.quantized_uniform(0.7, 1.05, 0.05),
        "n_estimators": choco.quantized_uniform(25, 525, 25),
    },
    {
        'model': 'RandomForestClassifier',
        "max_depth": choco.quantized_uniform(2, 10, 2),
        "min_samples_leaf": choco.quantized_uniform(2, 10, 2),
        "n_estimators": choco.quantized_uniform(25, 525, 25),
    },
    {
        'model': 'GaussianNB',
        "var_smoothing": choco.log(-12, -6, 10)
    },
    {