def experiment(state, channel): """ Experiment function. Used by jobman to run jobs. Must be loaded externally. Parameters ---------- state: WRITEME channel: WRITEME """ yaml_template = open(yaml_file).read() hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict) file_params = expand(flatten(state.file_parameters), dict_type=ydict) # Hack to fill in file parameter strings first for param in file_params: yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param]) yaml = yaml_template % hyper_parameters # with open("/na/homes/dhjelm/pylearn2/pylearn2/jobman/nice_lr_search/%d.yaml" # % state.id, "w") as f: # f.write(yaml) train_object = yaml_parse.load(yaml) train_object.main_loop() state.results = extract_results(train_object.model) return channel.COMPLETE
def main(args): # Create a new database db = sql.db('%s?table=%s' % (args.database, args.table_name)) # Create a jobman state state = DD() # Loop over the search space and schedule jobs config_generator = gen_configurations(args.configurations) for i, params in enumerate(config_generator): # Complete parameters dictionary and add to the state state.parameters = params state.parameters['model'] = args.model state.parameters['dataset'] = args.dataset state.parameters['nb_epoch'] = args.n_epochs state.parameters['batch_size'] = args.batch_size # Insert the job into the database if args.model == 'ADIOS': state.parameters['labels_order'] = args.labels_order state.parameters['n_label_splits'] = args.n_label_splits for i in xrange(args.n_label_splits): state.parameters['label_split'] = i + 1 sql.insert_job(train_adios, flatten(state), db) else: # args.model == 'MLP' sql.insert_job(train_mlp, flatten(state), db) # Create a view for the new database table db.createView(args.table_name + '_view')
def experiment(state, channel): """ Experiment function. Used by jobman to run jobs. Must be loaded externally. Parameters ---------- state: WRITEME channel: WRITEME """ yaml_template = open(yaml_file).read() hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict) file_params = expand(flatten(state.file_parameters), dict_type=ydict) # Hack to fill in file parameter strings first for param in file_params: yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param]) yaml = yaml_template % hyper_parameters train_object = yaml_parse.load(yaml) state.pid = os.getpid() channel.save() train_object.main_loop() state.results = extract_results(train_object.model) return channel.COMPLETE
def run_experiment_jobman(state, channel): """ Main jobman experiment function, called by all jobs. """ experiment_module = state["experiment&module"] experiment = imp.load_source("module.name", experiment_module) yaml_template = open(experiment.yaml_file).read() hyperparams = expand(flatten(translate(state.hyperparams, "pylearn2")), dict_type=ydict) if not state["out&path"].endswith("job_%d" % state["jobman.id"]): state["out&path"] = path.join(state["out&path"], "job_%d" % state["jobman.id"]) channel.save() out_path = path.join(state["out&path"]) try: run_experiment(experiment, hyperparams, ask=False, out_path=out_path, dbdescr=state["dbdescr"], job_id=state["jobman.id"]) except ValueError as e: if str(e) == "KILLED": return channel.CANCELED else: return channel.ERR_RUN except: return channel.ERR_RUN print "Ending experiment" return channel.COMPLETE
def create_jobman_jobs(): #Database operations TABLE_NAME = "arcade_post_mlp_cv_binary_8x8_40k" db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME) ri = numpy.random.random_integers # Default values state = DD() state.dataset = \ "/home/gulcehre/dataset/pentomino/experiment_data/pento64x64_40k_seed_23112222.npy" state.no_of_folds = 5 state.exid = 0 state.n_hiddens = [100, 200, 300] state.n_hidden_layers = 3 state.learning_rate = 0.001 state.l1_reg = 1e-5 state.l2_reg = 1e-3 state.n_epochs = 2 state.batch_size = 120 state.save_exp_data = True self.no_of_patches = 64 state.cost_type = "crossentropy" state.n_in = 8*8 state.n_out = 1 state.best_valid_error = 0.0 state.best_test_error = 0.0 state.valid_obj_path_error = 0.0 state.test_obj_path_error = 0.0 l1_reg_values = [0., 1e-6, 1e-5, 1e-4] l2_reg_values = [0., 1e-5, 1e-4] learning_rates = numpy.logspace(numpy.log10(0.0001), numpy.log10(1), 36) num_hiddens = numpy.logspace(numpy.log10(256), numpy.log10(2048), 24) for i in xrange(NO_OF_TRIALS): state.exid = i state.n_hidden_layers = ri(4) n_hiddens = [] for i in xrange(state.n_hidden_layers): n_hiddens.append(int(num_hiddens[ri(num_hiddens.shape[0]) - 1])) state.n_hiddens = n_hiddens state.learning_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.l1_reg = l1_reg_values[ri(len(l1_reg_values)) - 1] state.l2_reg = l2_reg_values[ri(len(l2_reg_values)) - 1] sql.insert_job(experiment, flatten(state), db) db.createView(TABLE_NAME + "_view")
def _load_in_safe_session(self, db, safe_session, filter_eq_dct=None, row_id=None, hash_of=None): with safe_session.set_timer(60 * 5): logger.debug("Query session...") q = db.query(safe_session.session) if row_id is not None: sql_row = q._query.get(row_id) if sql_row is None: raise OperationalError("There is no rows with id \"%d\"" % row_id) sql_rows = [sql_row] elif hash_of is not None: hashcode = sql.hash_state(flatten(hash_of)) sql_rows = q._query.filter(db._Dict.hash == hashcode).all() elif filter_eq_dct is not None: sql_rows = q.filter_eq_dct(filter_eq_dct).all() else: sql_rows = q.all() logger.debug("Query done") return sql_rows
def main(args): dataset_name = args.dataset_name logger.info("Getting dataset info for %s" % dataset_name) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name) mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() user = path.expandvars("$USER") save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%s" % (user, "rbm_simple_test")) # File parameters are path specific ones (not model specific). file_params = {"save_path": save_path, } yaml_template = open(yaml_file).read() hyperparams = expand(flatten(experiment.default_hyperparams(input_dim=input_dim)), dict_type=ydict) # Set additional hyperparams from command line args if args.learning_rate is not None: hyperparams["learning_rate"] = args.learning_rate if args.batch_size is not None: hyperparams["batch_size"] = args.batch_size for param in file_params: yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param]) yaml = yaml_template % hyperparams logger.info("Training") train = yaml_parse.load(yaml) train.main_loop()
def save(self, table_name, row): db = self._open_db(table_name) with SafeSession(db) as safe_session: if "id" in row: row_id = row["id"] del row["id"] sql_row = self._load_in_safe_session(db=db, safe_session=safe_session, row_id=row_id)[0] logger.debug("update row %d" % row_id) with safe_session.set_timer(60 * 5): sql_row.update_simple(flatten(row), safe_session.session) else: logger.debug("insert new row") with safe_session.set_timer(60 * 5): sql_row = sql.insert_dict(flatten(row), db, session=safe_session.session) if sql_row is None: raise OperationalError("Identical row already exists") sql_row._set_in_session(sql.JOBID, sql_row.id, safe_session.session) row_id = sql_row.id with safe_session.set_timer(60 * 5): safe_session.session.commit() logger.debug("session commited") # load in new session otherwise lazy attribute selection hangs # on forever... why is that!? with SafeSession(db) as safe_session: sql_row = self._load_in_safe_session(db, safe_session, row_id=row_id)[0] logger.debug("Fetch all row attributes...") eager_dict = self._eager_dicts([sql_row], safe_session)[0] logger.debug("Fetch done") eager_dict['id'] = row_id return eager_dict
def set_hyperparams(job, db): dbjob = db.get(job.job_id) job.file_prefix = dbjob["file_parameters.save_path"] job.out_dir = path.join(job.table_dir, job.file_prefix.split("/")[-1]) hyperparams = experiment.default_hyperparams() model_keys = flatten(hyperparams).keys() job.hyperparams = dict((" ".join(k.replace(".__builder__", "").split(".")), dbjob.get("hyper_parameters." + k, None)) for k in model_keys)
def save(self, table_name, row): db = self._open_db(table_name) with SafeSession(db) as safe_session: if "id" in row: row_id = row["id"] del row["id"] sql_row = self._load_in_safe_session( db=db, safe_session=safe_session, row_id=row_id)[0] logger.debug("update row %d" % row_id) with safe_session.set_timer(60 * 5): sql_row.update_simple(flatten(row), safe_session.session) else: logger.debug("insert new row") with safe_session.set_timer(60 * 5): sql_row = sql.insert_dict(flatten(row), db, session=safe_session.session) if sql_row is None: raise OperationalError("Identical row already exists") sql_row._set_in_session(sql.JOBID, sql_row.id, safe_session.session) row_id = sql_row.id with safe_session.set_timer(60 * 5): safe_session.session.commit() logger.debug("session commited") # load in new session otherwise lazy attribute selection hangs # on forever... why is that!? with SafeSession(db) as safe_session: sql_row = self._load_in_safe_session(db, safe_session, row_id=row_id)[0] logger.debug("Fetch all row attributes...") eager_dict = self._eager_dicts([sql_row], safe_session)[0] logger.debug("Fetch done") eager_dict['id'] = row_id return eager_dict
def set_hyperparams(job, db): dbjob = db.get(job.job_id) job.file_prefix = dbjob["file_parameters.save_path"] job.out_dir = path.join(job.table_dir, job.file_prefix.split("/")[-1]) hyperparams = experiment.default_hyperparams() model_keys = flatten(hyperparams).keys() job.hyperparams = dict( (" ".join(k.replace(".__builder__", "").split(".")), dbjob.get("hyper_parameters." + k, None)) for k in model_keys)
def insert_jobexp(exp_args, jobman_args): """ Insert jobman experiment jobs in a sql database. Remarks: We have this in a separate module since we can't refer to the jobexp.run function from within the jobexp module by it's absolute name (else jobman won't find the experiment when run) :param exp_args: :param jobman_args: :return: """ table_name = jobman_args.get("table_name", "experiment") db = api0.open_db('postgres://*****:*****@127.0.0.1/jobbase?table='+table_name) for arg in jobman_args: sql.insert_job(predictive_rl.rlglueexp.jobexp.run, flatten(arg), db)
def update(self, table_name, rows, update_dict): db = self._open_db(table_name) with SafeSession(db) as safe_session: with safe_session.set_timer(60 * 5): for row in rows: sql_row = self._load_in_safe_session( db=db, safe_session=safe_session, row_id=row['id'])[0] try: sql_row.update_simple(flatten(update_dict), safe_session.session) except: sys.stderr.write("Failed for row: %d\n" % row['id']) safe_session.session.commit()
def train_experiment(state, channel): """ Train a model specified in state, and extract required results. This function builds a YAML string from ``state.yaml_template``, taking the values of hyper-parameters from ``state.hyper_parameters``, creates the corresponding object and trains it (like train.py), then run the function in ``state.extract_results`` on it, and store the returned values into ``state.results``. To know how to use this function, you can check the example in tester.py (in the same directory). """ yaml_template = state.yaml_template # Convert nested DD into nested ydict. hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict) # This will be the complete yaml string that should be executed final_yaml_str = yaml_template % hyper_parameters # Instantiate an object from YAML string train_obj = pylearn2.config.yaml_parse.load(final_yaml_str) for ext in train_obj.extensions: if hasattr(ext, 'set_train_obj'): ext.set_train_obj(train_obj) if hasattr(ext, 'set_jobman_channel'): ext.set_jobman_channel(channel) if hasattr(ext, 'set_jobman_state'): ext.set_jobman_state(state) try: iter(train_obj) iterable = True except TypeError: iterable = False if iterable: raise NotImplementedError( ('Current implementation does not support running multiple ' 'models in one yaml string. Please change the yaml template ' 'and parameters to contain only one single model.')) else: # print "Executing the model." train_obj.main_loop() # This line will call a function defined by the user and pass train_obj # to it. state.results = jobman.tools.resolve(state.extract_results)(train_obj) return channel.COMPLETE
def train_experiment(state, channel): """ Train a model specified in state, and extract required results. This function builds a YAML string from ``state.yaml_template``, taking the values of hyper-parameters from ``state.hyper_parameters``, creates the corresponding object and trains it (like train.py), then run the function in ``state.extract_results`` on it, and store the returned values into ``state.results``. To know how to use this function, you can check the example in tester.py (in the same directory). """ yaml_template = state.yaml_template # Convert nested DD into nested ydict. hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict) # This will be the complete yaml string that should be executed final_yaml_str = yaml_template % hyper_parameters # Instantiate an object from YAML string train_obj = pylearn2.config.yaml_parse.load(final_yaml_str) for ext in train_obj.extensions: if hasattr(ext, 'set_train_obj'): ext.set_train_obj( train_obj ) if hasattr(ext, 'set_jobman_channel'): ext.set_jobman_channel( channel ) if hasattr(ext, 'set_jobman_state'): ext.set_jobman_state( state ) try: iter(train_obj) iterable = True except TypeError: iterable = False if iterable: raise NotImplementedError( ('Current implementation does not support running multiple ' 'models in one yaml string. Please change the yaml template ' 'and parameters to contain only one single model.')) else: # print "Executing the model." train_obj.main_loop() # This line will call a function defined by the user and pass train_obj # to it. state.results = jobman.tools.resolve(state.extract_results)(train_obj) return channel.COMPLETE
def run_experiment(experiment, **kwargs): """ Experiment function. Used by jobman to run jobs. Must be loaded externally. TODO: add sigint handling. Parameters ---------- experiment: module Experiment module. kwargs: dict Typically hyperparameters. """ hyper_parameters = experiment.default_hyperparams() set_hyper_parameters(hyper_parameters, **kwargs) file_parameters = experiment.fileparams set_hyper_parameters(file_parameters, **kwargs) hyper_parameters.update(file_parameters) ih = MRIInputHandler() input_dim, variance_map_file = ih.get_input_params(hyper_parameters) hyper_parameters["nvis"] = input_dim hyper_parameters["variance_map_file"] = variance_map_file pid = os.getpid() out_path = serial.preprocess( hyper_parameters.get("out_path", "${PYLEARN2_OUTS}")) if not path.isdir(out_path): os.mkdir(out_path) if not path.isdir(path.join(out_path, "logs")): os.mkdir(path.join(out_path, "logs")) hyper_parameters = expand(flatten(hyper_parameters), dict_type=ydict) lh = LogHandler(experiment, hyper_parameters, out_path, pid) h = logging.StreamHandler(lh) monitor.log.addHandler(h) yaml_template = open(experiment.yaml_file).read() yaml = yaml_template % hyper_parameters train_object = yaml_parse.load(yaml) try: train_object.main_loop() lh.finish("COMPLETED") except KeyboardInterrupt: print("Quitting...") lh.finish("KILLED")
def train_experiment(state, channel): """ Train a model specified in state, and extract required results. This function builds a YAML string from ``state.yaml_template``, taking the values of hyper-parameters from ``state.hyper_parameters``, creates the corresponding object and trains it (like train.py), then run the function in ``state.extract_results`` on it, and store the returned values into ``state.results``. To know how to use this function, you can check the example in tester.py (in the same directory). """ yaml_template = state.yaml_template # Convert nested DD into nested ydict. hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict) # This will be the complete yaml string that should be executed final_yaml_str = yaml_template % hyper_parameters # write to .yaml file for ease of reproducibility fp = open('experiment.yaml', 'w') fp.write('%s' % final_yaml_str[2:]) fp.close() # Instantiate an object from YAML string train_obj = pylearn2.config.yaml_parse.load(final_yaml_str) try: iter(train_obj) iterable = True except TypeError: iterable = False if iterable: raise NotImplementedError( ('Current implementation does not support running multiple ' 'models in one yaml string. Please change the yaml template ' 'and parameters to contain only one single model.')) else: # print "Executing the model." # (GD) HACK HACK train_obj.model.jobman_channel = channel train_obj.model.jobman_state = state train_obj.main_loop() return channel.COMPLETE
def load_experiments_jobman(experiment_module, jobargs): """ Load jobs from experiment onto postgresql database table. """ dbdescr = get_desc(jobargs) db = api0.open_db(dbdescr) experiment = imp.load_source("module.name", experiment_module) for i, items in enumerate(experiment.generator): hyperparams = experiment.default_hyperparams state = DD() set_hyper_parameters(hyperparams, **dict((k, v) for k, v in items)) state.hyperparams = translate(hyperparams, "knex") state["out&path"] = path.abspath(jobargs["out_path"]) state["experiment&module"] = path.abspath(experiment_module) state["dbdescr"] = dbdescr sql.insert_job(run_experiment_jobman, flatten(state), db) db.createView("%s" % jobargs["table"])
def main(dataset_name="smri"): logger.info("Getting dataset info for %s" % args.dataset_name) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name) mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) user = path.expandvars("$USER") save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%s" % (user, "jobman_test")) file_params = {"save_path": save_path, "variance_map_file": variance_map_file } yaml_template = open(yaml_file).read() hyperparams = expand(flatten(mlp_experiment.default_hyperparams(input_dim=input_dim)), dict_type=ydict) for param in hyperparams: if hasattr(args, param) and getattr(args, param): val = getattr(args, param) logger.info("Filling %s with %r" % (param, val)) hyperparams[param] = type(hyperparams[param])(val) for param in file_params: yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param]) yaml = yaml_template % hyperparams print yaml logger.info("Training") train = yaml_parse.load(yaml) train.main_loop()
def main(args): dataset_name = args.dataset_name logger.info("Getting dataset info for %s" % dataset_name) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name) mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() user = path.expandvars("$USER") save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%s" % (user, "rbm_simple_test")) # File parameters are path specific ones (not model specific). file_params = { "save_path": save_path, } yaml_template = open(yaml_file).read() hyperparams = expand(flatten( experiment.default_hyperparams(input_dim=input_dim)), dict_type=ydict) # Set additional hyperparams from command line args if args.learning_rate is not None: hyperparams["learning_rate"] = args.learning_rate if args.batch_size is not None: hyperparams["batch_size"] = args.batch_size for param in file_params: yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param]) yaml = yaml_template % hyperparams logger.info("Training") train = yaml_parse.load(yaml) train.main_loop()
state.hyper_parameters = { 'trainfile': 'train_gray_uvd_rot_1562_31.h5', 'N': 10 * 64, 'batch_size': 64 * 2, 'c1': c1, 'kernel_c1': kernel_c1, 'pool_c1': pool_c1, 'c2': c2, 'kernel_c2': kernel_c2, 'pool_c2': pool_c2, 'irange_c1': irange_c1, 'irange_c2': irange_c2, 'irange_hd1': irange_hd1, 'irange_hd2': irange_hd2, 'irange_out': irange_out, 'hd1': hd1, 'hd2': hd2, 'output_dim': outdim, 'lamda': lamda, 'decay': decay, 'max_epochs': 100, 'save_best_path': save_best_path } yaml_template = state.yaml_template hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict) # This will be the complete yaml string that should be executed final_yaml_str = yaml_template % hyper_parameters train_obj = pylearn2.config.yaml_parse.load(final_yaml_str) train_obj.main_loop()
'n': 2*64, 'batch_size': 64, 'c1': c1, 'kernel_c1':kernel_c1, 'pool_c1':pool_c1, 'c2': c2, 'kernel_c2':kernel_c2, 'pool_c2':pool_c2, 'irange_c1':irange_c1, 'irange_c2':irange_c2, 'irange_hd1':irange_hd1, 'irange_hd2':irange_hd2, 'irange_out':irange_out, 'hd1': 2592, 'hd2': 36, 'output_dim':constants.NUM_JNTS * 3, 'lamda':lamda, 'decay':decay, 'max_epochs': 50, 'save_best_path': save_best_path } yaml_template = state.yaml_template hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict) # This will be the complete yaml string that should be executed final_yaml_str = yaml_template % hyper_parameters train_obj = pylearn2.config.yaml_parse.load(final_yaml_str) train_obj.main_loop() # hadag
def load_experiments(args): dataset_name = args.dataset_name db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % {"user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s%s" % (dataset_name, ", transposed" if args.transposed else "")) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name) if args.transposed: logger.info("Data in transpose...") mri = MRI.MRI_Transposed(dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) input_dim = mri.X.shape[1] variance_map_file = path.join(data_path, "transposed_variance_map.npy") else: mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in nested_generator(layer_depth_generator("encoder.layer_depths", xrange(4, 6), 5), hidden_generator("encoder.nhid", 4), float_generator("weight_decay.coeffs.z", 3, 0.1, 0.001, log_scale=True)): # logger.info("Adding NICE experiment with hyperparameters %s" % (items, )) state = DD() experiment_hyperparams = nice_experiment.default_hyperparams(input_dim) if args.transposed: experiment_hyperparams["data_class"] = "MRI_Transposed" if args.logistic: experiment_hyperparams["prior"]["__builder__"] =\ "nice.pylearn2.models.nice.StandardLogistic" for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) user = path.expandvars("$USER") save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams sql.insert_job( nice_experiment.experiment, flatten(state), db ) db.createView("%s_view" % args.table)
def load_experiments(args): dataset_name = args.dataset_name # Load the database and table. db = sql.db( "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) # Don't worry about this yet. input_handler = InputHandler() # For generating models, we use a special set of jobman generators, made # for convenience. for items in jg.nested_generator( jg.float_generator("learning_rate", 3, 0.01, 0.0001, log_scale=True), jg.list_generator("nhid", [50, 100, 200, 300]), ): logger.info("Adding RBM experiment across hyperparameters %s" % (items, )) state = DD() # Load experiment hyperparams from experiment experiment_hyperparams = experiment.default_hyperparams() # Set them with values in our loop. for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] assert split_keys[-1] in entry, ( "Key not found in hyperparams: %s" % split_keys[-1]) entry[split_keys[-1]] = value # Set the dataset name experiment_hyperparams["dataset_name"] = dataset_name # Get the input dim and variance map. Don't worry about variance maps right now, # they aren't used here. input_dim, variance_map_file = input_handler.get_input_params( args, experiment_hyperparams) logger.info("%s\n%s\n" % (input_dim, variance_map_file)) # Set the input dimensionality by the data experiment_hyperparams["nvis"] = input_dim # Set the minimum learning rate relative to the initial learning rate. experiment_hyperparams[ "min_lr"] = experiment_hyperparams["learning_rate"] / 10 # Make a unique hash for experiments. Remember that lists, dicts, and other data # types may not be hashable, so you may need to do some special processing. In # this case we convert the lists to tuples. h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) # Save path for the experiments. In this case we are sharing a directory in my # export directory so IT can blame me. save_path = serial.preprocess( "/export/mialab/users/dhjelm/pylearn2_outs/rbm_demo/%d" % h) # We save file params separately as they aren't model specific. file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams user = path.expandvars("$USER") state.created_by = user # Finally we add the experiment to the table. sql.insert_job(experiment.experiment, flatten(state), db) # A view can be used when querying the database using psql. May not be needed in future. db.createView("%s_view" % args.table)
def load_experiments(args): dataset_name = args.dataset_name db = sql.db( "postgres://%(user)s@%(host)s:" "%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) input_handler = InputHandler() for items in jg.nested_generator( jg.list_generator( "encoder.layer_depths", [[3, 5, 5, 5, 3], [5, 5, 5, 5, 5], [2, 4, 4, 2]]), jg.list_generator("variance_normalize", [False, 2]), jg.float_generator("weight_decay.coeff", 4, 0.1, 0.0001, log_scale=True), jg.list_generator("prior.__builder__", [ "nice.pylearn2.models.nice.StandardNormal", "nice.pylearn2.models.nice.StandardLogistic" ])): logger.info("Adding NICE experiment across hyperparameters %s" % (items, )) state = DD() experiment_hyperparams = nice_experiment.default_hyperparams() for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] assert split_keys[-1] in entry,\ ("Key not found in hyperparams: %s, " "found: %s" % (split_keys[-1], entry.keys())) entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name input_dim, variance_map_file = input_handler.get_input_params( args, experiment_hyperparams) logger.info("%s\n%s\n" % (input_dim, variance_map_file)) experiment_hyperparams["nvis"] = input_dim experiment_hyperparams["encoder"]["nvis"] = input_dim h = abs(hash(frozenset( flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) user = path.expandvars("$USER") save_path = serial.preprocess( "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams sql.insert_job(nice_experiment.experiment, flatten(state), db) db.createView("%s_view" % args.table)
from jobman.tools import DD, flatten from jobman import api0, sql from jobman.examples.def_addition import addition_example TABLE_NAME = 'test_add_' # DB path... db = api0.open_db('postgres://<user>:<pass>@<server>/<database>?table=' + TABLE_NAME) state = DD() for first in 0, 2, 4, 6, 8, 10: state.first = first for second in 1, 3, 5, 7, 9: state.second = second sql.insert_job(addition_example, flatten(state), db)
def create_jobman_jobs(): #Database operations TABLE_NAME = "arcade_multi_prmlp_cv_binary_8x8_40k" db = api0.open_db( 'postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME) ri = numpy.random.random_integers # Default values state = DD() state.dataset = \ "/home/gulcehre/dataset/pentomino/experiment_data/pento64x64_40k_seed_23112222.npy" state.no_of_folds = 5 state.exid = 0 state.n_hiddens = [100, 200, 300] state.n_hidden_layers = 3 state.learning_rate = 0.001 state.l1_reg = 1e-5 state.l2_reg = 1e-3 state.n_epochs = 2 state.batch_size = 120 state.save_exp_data = True self.no_of_patches = 64 state.cost_type = "crossentropy" state.n_in = 8 * 8 state.n_out = 1 state.best_valid_error = 0.0 state.best_test_error = 0.0 state.valid_obj_path_error = 0.0 state.test_obj_path_error = 0.0 l1_reg_values = [0., 1e-6, 1e-5, 1e-4] l2_reg_values = [0., 1e-5, 1e-4] learning_rates = numpy.logspace(numpy.log10(0.0001), numpy.log10(1), 36) num_hiddens = numpy.logspace(numpy.log10(256), numpy.log10(2048), 24) for i in xrange(NO_OF_TRIALS): state.exid = i state.n_hidden_layers = ri(4) n_hiddens = [] for i in xrange(state.n_hidden_layers): n_hiddens.append(int(num_hiddens[ri(num_hiddens.shape[0]) - 1])) state.n_hiddens = n_hiddens state.learning_rate = learning_rates[ri(learning_rates.shape[0]) - 1] state.l1_reg = l1_reg_values[ri(len(l1_reg_values)) - 1] state.l2_reg = l2_reg_values[ri(len(l2_reg_values)) - 1] sql.insert_job(experiment, flatten(state), db) db.createView(TABLE_NAME + "_view")
def run_experiment(experiment, hyper_parameters=None, ask=True, keep=False, dbdescr=None, job_id=None, debug=False, dataset_root="${PYLEARN2_NI_PATH}", **kwargs): """ Experiment function. Used by jobman to run jobs. Must be loaded externally. TODO: add sigint handling. Parameters ---------- experiment: module Experiment module. kwargs: dict Typically hyperparameters. """ # Fill the hyperparameter values. if hyper_parameters is None: hyper_parameters = experiment.default_hyperparams set_hyper_parameters(hyper_parameters, **kwargs) file_parameters = experiment.fileparams set_hyper_parameters(file_parameters, **kwargs) hyper_parameters.update(file_parameters) # Set the output path, default from environment variable $PYLEARN2_OUTS out_path = serial.preprocess( hyper_parameters.get("out_path", "${PYLEARN2_OUTS}")) if not path.isdir(out_path): os.makedirs(out_path) processing_flag = mp.Value("b", False) mem = mp.Value("f", 0.0) cpu = mp.Value("f", 0.0) last_processed = mp.Manager().dict() last_processed["value"] = "Never" lh = LogHandler(experiment, out_path, processing_flag, mem, cpu, last_processed, dbdescr, job_id) h = logging.StreamHandler(lh) lh.logger.info("Hijacking pylearn2 logger (sweet)...") monitor.log.addHandler(h) try: # HACK TODO: fix this. For some reason knex formatted strings are # sometimes getting in. hyper_parameters = translate(hyper_parameters, "pylearn2") # Use the input hander to get input information. ih = input_handler.MRIInputHandler() input_dim, variance_map_file = ih.get_input_params( hyper_parameters, dataset_root=dataset_root) if hyper_parameters["nvis"] is None: hyper_parameters["nvis"] = input_dim # Hack for NICE. Need to rethink inner-dependencies of some model params. if ("encoder" in hyper_parameters.keys() and "nvis" in hyper_parameters["encoder"].keys() and hyper_parameters["encoder"]["nvis"] is None): hyper_parameters["encoder"]["nvis"] = input_dim # If there's min_lr, make it 1/10 learning_rate if "min_lr" in hyper_parameters.keys(): hyper_parameters["min_lr"] = hyper_parameters["learning_rate"] / 10 # Corruptor is a special case of hyper parameters that depends on input # file: variance_map. So we hack it in here. if "corruptor" in hyper_parameters.keys(): if "variance_map" in hyper_parameters["corruptor"].keys(): hyper_parameters["corruptor"]["variance_map"] =\ "!pkl: %s" % variance_map_file else: hyper_parameters["variance_map_file"] = variance_map_file lh.write_json() # The Process id pid = os.getpid() lh.logger.info("Proces id is %d" % pid) # If any pdfs are in out_path, kill or quit json_file = path.join(out_path, "analysis.json") if (ask and (path.isfile(json_file) or len(glob.glob(path.join(out_path, "*.pkl"))) > 0)): print("Results found in %s " "Proceeding will erase." % out_path) command = None while not command in ["yes", "no", "y", "n"]: command = raw_input("%s: " % "Proceed?") if command in ["yes", "y"]: break elif command in ["no", "n"]: exit() else: print("Please enter yes(y) or no(n)") if path.isfile(json_file): with open(json_file) as f: models = json.load(f) for model in models.keys(): lh.logger.info("Removing results for model %s" % model) try: os.rmdir(path.join(out_path, "%s_images" % model)) except: pass os.remove(json_file) for pkl in glob.glob(path.join(out_path, "*.pkl")): lh.logger.info("Removing %s" % pkl) os.remove(pkl) lh.logger.info("Making the train object") hyper_parameters = expand(flatten(hyper_parameters), dict_type=ydict) yaml_template = open(experiment.yaml_file).read() yaml = yaml_template % hyper_parameters train_object = yaml_parse.load(yaml) if debug: return train_object lh.write_json() lh.logger.info("Seting up subprocesses") lh.logger.info("Setting up listening socket") mp_ep, s_ep = mp.Pipe() p = mp.Process(target=server, args=(pid, s_ep)) p.start() port = mp_ep.recv() lh.logger.info("Listening on port %d" % port) lh.logger.info("Starting model processor") model_processor = ModelProcessor(experiment, train_object.save_path, mp_ep, processing_flag, last_processed) model_processor.start() lh.logger.info("Model processor started") lh.logger.info("Starting stat processor") stat_processor = StatProcessor(pid, mem, cpu) stat_processor.start() lh.logger.info("Stat processor started") lh.update(hyperparams=hyper_parameters, yaml=yaml, pid=pid, port=port) lh.write_json() except Exception as e: lh.logger.exception(e) lh.finish("FAILED") raise e # Clean the model after running def clean(): p.terminate() lh.logger.info("waiting for server...") p.join() model_processor.terminate() lh.logger.info("waiting for model processor...") model_processor.join() stat_processor.terminate() lh.logger.info("waiting for stat processor...") stat_processor.join() if keep: lh.logger.info("Keeping checkpoints") else: lh.logger.info("Cleaning checkpoints") try: os.remove(model_processor.best_checkpoint) except: pass try: os.remove(model_processor.checkpoint) except: pass # A signal handler so processes kill cleanly. def signal_handler(signum, frame): lh.logger.info("Forced quitting...") clean() lh.finish("KILLED") if dbdescr is None: exit() else: raise ValueError("KILLED") signal.signal(signal.SIGINT, signal_handler) # Main loop. try: lh.logger.info("Training...") train_object.main_loop() lh.logger.info("Training quit without exception") except Exception as e: lh.logger.exception(e) clean() lh.finish("FAILED") raise (e) # After complete, process model. lh.logger.info("Processing model results...") try: experiment.analyze_fn(model_processor.best_checkpoint, model_processor.out_path) except IOError: experiment.analyze_fn(model_processor.checkpoint, model_processor.out_path) except Exception as e: lh.logger.error(e) # Clean checkpoints. clean() lh.logger.info("Finished experiment.") lh.finish("COMPLETED") return
state = DD() state.learning_rate = 0.01 state.L1_reg = 0.00 state.L2_reg = 0.0001 state.n_iter = 50 state.batch_size = 20 state.n_hidden = 10 # Hyperparameter exploration for n_hidden in 20, 30: state.n_hidden = n_hidden # Explore L1 regularization w/o L2 state.L2_reg = 0. for L1_reg in 0., 1e-6, 1e-5, 1e-4: state.L1_reg = L1_reg # Insert job sql.insert_job(experiment, flatten(state), db) # Explore L2 regularization w/o L1 state.L1_reg = 0. for L2_reg in 1e-5, 1e-4: state.L1_reg = L1_reg # Insert job sql.insert_job(experiment, flatten(state), db) # Create the view db.createView(TABLE_NAME + '_view')
state.batch_size = 20 state.n_hidden = 10 # Hyperparameter exploration for n_hidden in 20, 30: print "h_hidden =",h_hidden state.n_hidden = n_hidden # Explore L1 regularization w/o L2 state.L2_reg = 0. for L1_reg in 0., 1e-6, 1e-5, 1e-4: print "L1_reg =",L1_reg state.L1_reg = L1_reg # Insert job sql.insert_job(experiment, flatten(state), db) # Explore L2 regularization w/o L1 state.L1_reg = 0. for L2_reg in 1e-5, 1e-4: print "L2_reg =",L2_reg state.L1_reg = L1_reg # Insert job sql.insert_job(experiment, flatten(state), db) # Create the view db.createView(TABLE_NAME+'_view')
def load_experiments(args): dataset_name = args.dataset_name db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % {"user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s" % dataset_name) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name) mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in jg.nested_generator(jg.hidden_generator("nhid1", 1), jg.hidden_generator("nhid2", 1), ): state = DD() experiment_hyperparams = mlp_experiment.default_hyperparams(input_dim) for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ flatten(experiment_hyperparams).values()))) user = path.expandvars("$USER") save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams state.pid = 0 sql.insert_job( mlp_experiment.experiment, flatten(state), db ) db.createView("%s_view" % args.table)
def load_experiments(args): dataset_name = args.dataset_name # Load the database and table. db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % {"user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) # Don't worry about this yet. input_handler = InputHandler() # For generating models, we use a special set of jobman generators, made # for convenience. for items in jg.nested_generator( jg.float_generator("learning_rate", 3, 0.01, 0.0001, log_scale=True), jg.list_generator("nhid", [50, 100, 200, 300]), ): logger.info("Adding RBM experiment across hyperparameters %s" % (items, )) state = DD() # Load experiment hyperparams from experiment experiment_hyperparams = experiment.default_hyperparams() # Set them with values in our loop. for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] assert split_keys[-1] in entry, ("Key not found in hyperparams: %s" % split_keys[-1]) entry[split_keys[-1]] = value # Set the dataset name experiment_hyperparams["dataset_name"] = dataset_name # Get the input dim and variance map. Don't worry about variance maps right now, # they aren't used here. input_dim, variance_map_file = input_handler.get_input_params(args, experiment_hyperparams) logger.info("%s\n%s\n" % (input_dim, variance_map_file)) # Set the input dimensionality by the data experiment_hyperparams["nvis"] = input_dim # Set the minimum learning rate relative to the initial learning rate. experiment_hyperparams["min_lr"] = experiment_hyperparams["learning_rate"] / 10 # Make a unique hash for experiments. Remember that lists, dicts, and other data # types may not be hashable, so you may need to do some special processing. In # this case we convert the lists to tuples. h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) # Save path for the experiments. In this case we are sharing a directory in my # export directory so IT can blame me. save_path = serial.preprocess("/export/mialab/users/dhjelm/pylearn2_outs/rbm_demo/%d" % h) # We save file params separately as they aren't model specific. file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams user = path.expandvars("$USER") state.created_by = user # Finally we add the experiment to the table. sql.insert_job( experiment.experiment, flatten(state), db ) # A view can be used when querying the database using psql. May not be needed in future. db.createView("%s_view" % args.table)
# tested didn't change at all. This can be confusing. from jobman.examples.def_addition import addition_example # here we build a list of dictionaries, each of which specifies a setting of # parameters corresponding to one job. # # We will pass this list to mydriver.main, so that it can potentially insert # them, or trim down the database to match the current list, and other sorts of # things. state = DD() jobs = [] for first in 0, 2, 4, 6, 8, 10: state.first = first for second in 1, 3, 5, 7, 9: state.second = second jobs.append(dict(flatten(state))) # # mydriver.main defines a few commands that are generally useful (insert, # clear_db, summary, etc.) but if you want to analyse your results you can add # additional commands. # # For example, if you type 'python additional_driver.py list_all', then this little function will run, because its function name is 'list_all'. # # For help on the meaning of the various elements of `kwargs`, see <WRITEME>. # @mydriver.mydriver_cmd_desc('list the contents of the database') def list_all(db, **kwargs): for d in db:
def run_experiment(experiment, hyper_parameters=None, ask=True, keep=False, dbdescr=None, job_id=None, debug=False, dataset_root="${PYLEARN2_NI_PATH}", **kwargs): """ Experiment function. Used by jobman to run jobs. Must be loaded externally. TODO: add sigint handling. Parameters ---------- experiment: module Experiment module. kwargs: dict Typically hyperparameters. """ # Fill the hyperparameter values. if hyper_parameters is None: hyper_parameters = experiment.default_hyperparams set_hyper_parameters(hyper_parameters, **kwargs) file_parameters = experiment.fileparams set_hyper_parameters(file_parameters, **kwargs) hyper_parameters.update(file_parameters) # Set the output path, default from environment variable $PYLEARN2_OUTS out_path = serial.preprocess( hyper_parameters.get("out_path", "${PYLEARN2_OUTS}")) if not path.isdir(out_path): os.makedirs(out_path) processing_flag = mp.Value("b", False) mem = mp.Value("f", 0.0) cpu = mp.Value("f", 0.0) last_processed = mp.Manager().dict() last_processed["value"] = "Never" lh = LogHandler(experiment, out_path, processing_flag, mem, cpu, last_processed, dbdescr, job_id) h = logging.StreamHandler(lh) lh.logger.info("Hijacking pylearn2 logger (sweet)...") monitor.log.addHandler(h) try: # HACK TODO: fix this. For some reason knex formatted strings are # sometimes getting in. hyper_parameters = translate(hyper_parameters, "pylearn2") # Use the input hander to get input information. ih = input_handler.MRIInputHandler() input_dim, variance_map_file = ih.get_input_params( hyper_parameters, dataset_root=dataset_root) if hyper_parameters["nvis"] is None: hyper_parameters["nvis"] = input_dim # Hack for NICE. Need to rethink inner-dependencies of some model params. if ("encoder" in hyper_parameters.keys() and "nvis" in hyper_parameters["encoder"].keys() and hyper_parameters["encoder"]["nvis"] is None): hyper_parameters["encoder"]["nvis"] = input_dim # If there's min_lr, make it 1/10 learning_rate if "min_lr" in hyper_parameters.keys(): hyper_parameters["min_lr"] = hyper_parameters["learning_rate"] / 10 # Corruptor is a special case of hyper parameters that depends on input # file: variance_map. So we hack it in here. if "corruptor" in hyper_parameters.keys(): if "variance_map" in hyper_parameters["corruptor"].keys(): hyper_parameters["corruptor"]["variance_map"] =\ "!pkl: %s" % variance_map_file else: hyper_parameters["variance_map_file"] = variance_map_file lh.write_json() # The Process id pid = os.getpid() lh.logger.info("Proces id is %d" % pid) # If any pdfs are in out_path, kill or quit json_file = path.join(out_path, "analysis.json") if (ask and (path.isfile(json_file) or len(glob.glob(path.join(out_path, "*.pkl"))) > 0)): print ("Results found in %s " "Proceeding will erase." % out_path) command = None while not command in ["yes", "no", "y", "n"]: command = raw_input("%s: " % "Proceed?") if command in ["yes", "y"]: break elif command in ["no", "n"]: exit() else: print ("Please enter yes(y) or no(n)") if path.isfile(json_file): with open(json_file) as f: models = json.load(f) for model in models.keys(): lh.logger.info("Removing results for model %s" % model) try: os.rmdir(path.join(out_path, "%s_images" % model)) except: pass os.remove(json_file) for pkl in glob.glob(path.join(out_path, "*.pkl")): lh.logger.info("Removing %s" % pkl) os.remove(pkl) lh.logger.info("Making the train object") hyper_parameters = expand(flatten(hyper_parameters), dict_type=ydict) yaml_template = open(experiment.yaml_file).read() yaml = yaml_template % hyper_parameters train_object = yaml_parse.load(yaml) if debug: return train_object lh.write_json() lh.logger.info("Seting up subprocesses") lh.logger.info("Setting up listening socket") mp_ep, s_ep = mp.Pipe() p = mp.Process(target=server, args=(pid, s_ep)) p.start() port = mp_ep.recv() lh.logger.info("Listening on port %d" % port) lh.logger.info("Starting model processor") model_processor = ModelProcessor(experiment, train_object.save_path, mp_ep, processing_flag, last_processed) model_processor.start() lh.logger.info("Model processor started") lh.logger.info("Starting stat processor") stat_processor = StatProcessor(pid, mem, cpu) stat_processor.start() lh.logger.info("Stat processor started") lh.update(hyperparams=hyper_parameters, yaml=yaml, pid=pid, port=port) lh.write_json() except Exception as e: lh.logger.exception(e) lh.finish("FAILED") raise e # Clean the model after running def clean(): p.terminate() lh.logger.info("waiting for server...") p.join() model_processor.terminate() lh.logger.info("waiting for model processor...") model_processor.join() stat_processor.terminate() lh.logger.info("waiting for stat processor...") stat_processor.join() if keep: lh.logger.info("Keeping checkpoints") else: lh.logger.info("Cleaning checkpoints") try: os.remove(model_processor.best_checkpoint) except: pass try: os.remove(model_processor.checkpoint) except: pass # A signal handler so processes kill cleanly. def signal_handler(signum, frame): lh.logger.info("Forced quitting...") clean() lh.finish("KILLED") if dbdescr is None: exit() else: raise ValueError("KILLED") signal.signal(signal.SIGINT, signal_handler) # Main loop. try: lh.logger.info("Training...") train_object.main_loop() lh.logger.info("Training quit without exception") except Exception as e: lh.logger.exception(e) clean() lh.finish("FAILED") raise(e) # After complete, process model. lh.logger.info("Processing model results...") try: experiment.analyze_fn(model_processor.best_checkpoint, model_processor.out_path) except IOError: experiment.analyze_fn(model_processor.checkpoint, model_processor.out_path) except Exception as e: lh.logger.error(e) # Clean checkpoints. clean() lh.logger.info("Finished experiment.") lh.finish("COMPLETED") return
from jobman.examples.def_addition import addition_example # here we build a list of dictionaries, each of which specifies a setting of # parameters corresponding to one job. # # We will pass this list to mydriver.main, so that it can potentially insert # them, or trim down the database to match the current list, and other sorts of # things. state = DD() jobs = [] for first in 0, 2, 4, 6, 8, 10: state.first = first for second in 1, 3, 5, 7, 9: state.second = second jobs.append(dict(flatten(state))) # # mydriver.main defines a few commands that are generally useful (insert, # clear_db, summary, etc.) but if you want to analyse your results you can add # additional commands. # # For example, if you type 'python additional_driver.py list_all', then this little function will run, because its function name is 'list_all'. # # For help on the meaning of the various elements of `kwargs`, see <WRITEME>. # @mydriver.mydriver_cmd_desc('list the contents of the database') def list_all(db, **kwargs): for d in db:
from jobman.tools import DD, flatten from jobman import api0, sql from jobman.examples.def_addition import addition_example TABLE_NAME = 'test_add_' # DB path... db = api0.open_db( 'postgres://<user>:<pass>@<server>/<database>?table=' + TABLE_NAME) state = DD() for first in 0, 2, 4, 6, 8, 10: state.first = first for second in 1, 3, 5, 7, 9: state.second = second sql.insert_job(addition_example, flatten(state), db)
from jobman import sql from jobman.parse import filemerge from Experimentsbatchpretrain import * import numpy db = sql.db('postgres://[email protected]/glorotxa_db/pretrainexpe') state = DD() state.curridata = DD(filemerge('Curridata.conf')) state.depth = 3 state.tie = True state.n_hid = 1000 #nb of unit per layer state.act = 'tanh' state.sup_lr = 0.01 state.unsup_lr = 0.001 state.noise = 0.25 state.seed = 1 state.nbepochs_unsup = 30 #maximal number of supervised updates state.nbepochs_sup = 1000 #maximal number of unsupervised updates per layer state.batchsize = 10 for i in ['MNIST','CIFAR10','ImageNet','shapesetbatch']: state.dat =i sql.insert_job(pretrain, flatten(state), db) db.createView('pretrainexpeview')
# decoding activation function is for the first layer # e.g. inputtype 'tfidf' ('tf*idf'?) uses activation function softplus # to decode the tf*idf. state.inputtype = 'binary' state.seed = 123 #here is the for loops that does the grid: for i in [0.01, 0.001, 0.0001]: state.lr = [i] for j in [(0.7, 0.0041), (0.5, 0.003), (0.8, 0.005)]: state.noise_lvl = [j] for k in [0.001, 0.00001, 0.0]: state.activation_regularization_coeff = [k] sql.insert_job( NLPSDAE, flatten(state), db ) #this submit the current state DD to the db, if it already exist in the db no additionnal job is added. db.createView('opentablegpuview') #in order to access the db from a compute node you need to create an tunnel ssh connection on ang23: #(to do one time, I think you should keep the shell open or you can create the tunnel on a screen and detached it) #ssh -v -f -o ServerAliveInterval=60 -o ServerAliveCountMax=60 -N -L *:5432:localhost:5432 gershwin.iro.umontreal.ca #you will need to give your LISA password. #here is the command you use to launch 1 jobs of the db. #THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 sqsub -q gpu -r 2d -n 1 --gpp=1 --memperproc=2.5G -o the_output_you_want jobman sql 'postgres://glorotxa@ang23/glorotxa_db/opentablegpu' /scratch/glorotxa/
def load_experiments(args): dataset_name = args.dataset_name db = sql.db( "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s" % dataset_name) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name) mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in jg.nested_generator( jg.hidden_generator("nhid1", 1), jg.hidden_generator("nhid2", 1), ): state = DD() experiment_hyperparams = mlp_experiment.default_hyperparams(input_dim) for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ flatten(experiment_hyperparams).values()))) user = path.expandvars("$USER") save_path = serial.preprocess( "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams state.pid = 0 sql.insert_job(mlp_experiment.experiment, flatten(state), db) db.createView("%s_view" % args.table)
# to decode the tf*idf. state.inputtype = 'binary' state.seed = 123 state.activation_regularization_coeff = [0] #here is the for loops that does the grid: for i in [0.01,0.001]: state.lr = [i] for j in [0.5,0.25,0.125,0.05]: state.noise_lvl=[j] for k in [1400,2500,5000]: state.n_hid = [k] sql.insert_job(NLPSDAE, flatten(state), db) #this submit the current state DD to the db, if it already exist in the db no additionnal job is added. db.createView('opentablegpuview') # First run this script # PYTHONPATH=$PYTHONPATH:.. python DARPAjobs.py # Test the jobs are in the database: # psql -d ift6266h10_sandbox_db -h gershwin.iro.umontreal.ca -U ift6266h10 # select id,lr,noiselvl,nhid as reg,jobman_status from opentablegpuview; # password: f0572cd63b # Set some values # update opentablegpukeyval set ival=0 where name='jobman.status'; # update opentablegpukeyval set ival=0 where name='jobman.status' where dict_id=20;
method_name: contraction_penalty }, coefficient: %(coefficient)f } ], "termination_criterion" : %(term_crit)s, } } ''' state.hyper_parameters = { "file": "${PYLEARN2_DATA_PATH}/UTLC/pca/sylvester_train_x_pca32.npy", "nvis": 32, "nhid": 6, "learning_rate": 0.1, "batch_size": 10, "coefficient": 0.5, "term_crit": { "__builder__": "pylearn2.training_algorithms.sgd.EpochCounter", "max_epochs": 2 } } state.extract_results = "pylearn2.scripts.jobman.tester.result_extractor" sql.insert_job( experiment.train_experiment, flatten(state), db, force_dup=True)
def load_experiments(args): dataset_name = args.dataset_name db = sql.db( "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" % { "user": args.user, "host": args.host, "port": args.port, "database": args.database, "table": args.table, }) logger.info("Getting dataset info for %s%s" % (dataset_name, ", transposed" if args.transposed else "")) data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name) if args.transposed: logger.info("Data in transpose...") mri = MRI.MRI_Transposed(dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) input_dim = mri.X.shape[1] variance_map_file = path.join(data_path, "transposed_variance_map.npy") else: mask_file = path.join(data_path, "mask.npy") mask = np.load(mask_file) input_dim = (mask == 1).sum() if input_dim % 2 == 1: input_dim -= 1 mri = MRI.MRI_Standard(which_set="full", dataset_name=args.dataset_name, unit_normalize=True, even_input=True, apply_mask=True) variance_map_file = path.join(data_path, "variance_map.npy") mri_nifti.save_variance_map(mri, variance_map_file) for items in nested_generator( layer_depth_generator("encoder.layer_depths", xrange(4, 6), 5), hidden_generator("encoder.nhid", 4), float_generator("weight_decay.coeffs.z", 3, 0.1, 0.001, log_scale=True)): # logger.info("Adding NICE experiment with hyperparameters %s" % (items, )) state = DD() experiment_hyperparams = nice_experiment.default_hyperparams(input_dim) if args.transposed: experiment_hyperparams["data_class"] = "MRI_Transposed" if args.logistic: experiment_hyperparams["prior"]["__builder__"] =\ "nice.pylearn2.models.nice.StandardLogistic" for key, value in items: split_keys = key.split(".") entry = experiment_hyperparams for k in split_keys[:-1]: entry = entry[k] entry[split_keys[-1]] = value experiment_hyperparams["dataset_name"] = dataset_name h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\ [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()]))) user = path.expandvars("$USER") save_path = serial.preprocess( "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h)) file_params = { "save_path": save_path, "variance_map_file": variance_map_file, } state.file_parameters = file_params state.hyper_parameters = experiment_hyperparams sql.insert_job(nice_experiment.experiment, flatten(state), db) db.createView("%s_view" % args.table)