示例#1
0
def experiment(state, channel):
    """
    Experiment function.
    Used by jobman to run jobs. Must be loaded externally.

    Parameters
    ----------
    state: WRITEME
    channel: WRITEME
    """

    yaml_template = open(yaml_file).read()
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)

    file_params = expand(flatten(state.file_parameters), dict_type=ydict)
    # Hack to fill in file parameter strings first
    for param in file_params:
        yaml_template = yaml_template.replace("%%(%s)s" % param,
                                              file_params[param])

    yaml = yaml_template % hyper_parameters
    #    with open("/na/homes/dhjelm/pylearn2/pylearn2/jobman/nice_lr_search/%d.yaml"
    #              % state.id, "w") as f:
    #        f.write(yaml)
    train_object = yaml_parse.load(yaml)

    train_object.main_loop()
    state.results = extract_results(train_object.model)
    return channel.COMPLETE
def main(args):
    # Create a new database
    db = sql.db('%s?table=%s' % (args.database, args.table_name))

    # Create a jobman state
    state = DD()

    # Loop over the search space and schedule jobs
    config_generator = gen_configurations(args.configurations)
    for i, params in enumerate(config_generator):
        # Complete parameters dictionary and add to the state
        state.parameters = params
        state.parameters['model'] = args.model
        state.parameters['dataset'] = args.dataset
        state.parameters['nb_epoch'] = args.n_epochs
        state.parameters['batch_size'] = args.batch_size

        # Insert the job into the database
        if args.model == 'ADIOS':
            state.parameters['labels_order'] = args.labels_order
            state.parameters['n_label_splits'] = args.n_label_splits
            for i in xrange(args.n_label_splits):
                state.parameters['label_split'] = i + 1
                sql.insert_job(train_adios, flatten(state), db)
        else:  # args.model == 'MLP'
            sql.insert_job(train_mlp, flatten(state), db)

    # Create a view for the new database table
    db.createView(args.table_name + '_view')
def experiment(state, channel):
    """
    Experiment function.
    Used by jobman to run jobs. Must be loaded externally.

    Parameters
    ----------
    state: WRITEME
    channel: WRITEME
    """

    yaml_template = open(yaml_file).read()
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)

    file_params = expand(flatten(state.file_parameters), dict_type=ydict)
    # Hack to fill in file parameter strings first
    for param in file_params:
        yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param])

    yaml = yaml_template % hyper_parameters
    train_object = yaml_parse.load(yaml)

    state.pid = os.getpid()
    channel.save()
    train_object.main_loop()

    state.results = extract_results(train_object.model)
    return channel.COMPLETE
示例#4
0
def run_experiment_jobman(state, channel):
    """
    Main jobman experiment function, called by all jobs.
    """
    experiment_module = state["experiment&module"]
    experiment = imp.load_source("module.name", experiment_module)

    yaml_template = open(experiment.yaml_file).read()
    hyperparams = expand(flatten(translate(state.hyperparams, "pylearn2")),
                         dict_type=ydict)

    if not state["out&path"].endswith("job_%d" % state["jobman.id"]):
        state["out&path"] = path.join(state["out&path"],
                                      "job_%d" % state["jobman.id"])
    channel.save()
    out_path = path.join(state["out&path"])
    try:
        run_experiment(experiment,
                       hyperparams,
                       ask=False,
                       out_path=out_path,
                       dbdescr=state["dbdescr"],
                       job_id=state["jobman.id"])
    except ValueError as e:
        if str(e) == "KILLED":
            return channel.CANCELED
        else:
            return channel.ERR_RUN
    except:
        return channel.ERR_RUN

    print "Ending experiment"
    return channel.COMPLETE
示例#5
0
def create_jobman_jobs():

    #Database operations
    TABLE_NAME = "arcade_post_mlp_cv_binary_8x8_40k"

    db = api0.open_db('postgresql://[email protected]/gulcehrc_db?table=' + TABLE_NAME)

    ri = numpy.random.random_integers

    # Default values
    state = DD()
    state.dataset = \
    "/home/gulcehre/dataset/pentomino/experiment_data/pento64x64_40k_seed_23112222.npy"

    state.no_of_folds = 5
    state.exid = 0

    state.n_hiddens = [100, 200, 300]
    state.n_hidden_layers = 3

    state.learning_rate = 0.001
    state.l1_reg = 1e-5
    state.l2_reg = 1e-3
    state.n_epochs = 2
    state.batch_size = 120
    state.save_exp_data = True
    self.no_of_patches = 64
    state.cost_type = "crossentropy"
    state.n_in = 8*8
    state.n_out = 1

    state.best_valid_error = 0.0

    state.best_test_error = 0.0

    state.valid_obj_path_error = 0.0
    state.test_obj_path_error = 0.0

    l1_reg_values = [0., 1e-6, 1e-5, 1e-4]
    l2_reg_values = [0., 1e-5, 1e-4]

    learning_rates = numpy.logspace(numpy.log10(0.0001), numpy.log10(1), 36)
    num_hiddens = numpy.logspace(numpy.log10(256), numpy.log10(2048), 24)

    for i in xrange(NO_OF_TRIALS):
        state.exid = i
        state.n_hidden_layers = ri(4)
        n_hiddens = []

        for i in xrange(state.n_hidden_layers):
            n_hiddens.append(int(num_hiddens[ri(num_hiddens.shape[0]) - 1]))

        state.n_hiddens = n_hiddens

        state.learning_rate = learning_rates[ri(learning_rates.shape[0]) - 1]
        state.l1_reg = l1_reg_values[ri(len(l1_reg_values)) - 1]
        state.l2_reg = l2_reg_values[ri(len(l2_reg_values)) - 1]
        sql.insert_job(experiment, flatten(state), db)

    db.createView(TABLE_NAME + "_view")
示例#6
0
    def _load_in_safe_session(self,
                              db,
                              safe_session,
                              filter_eq_dct=None,
                              row_id=None,
                              hash_of=None):

        with safe_session.set_timer(60 * 5):
            logger.debug("Query session...")
            q = db.query(safe_session.session)
            if row_id is not None:
                sql_row = q._query.get(row_id)
                if sql_row is None:
                    raise OperationalError("There is no rows with id \"%d\"" %
                                           row_id)
                sql_rows = [sql_row]
            elif hash_of is not None:
                hashcode = sql.hash_state(flatten(hash_of))
                sql_rows = q._query.filter(db._Dict.hash == hashcode).all()
            elif filter_eq_dct is not None:
                sql_rows = q.filter_eq_dct(filter_eq_dct).all()
            else:
                sql_rows = q.all()
            logger.debug("Query done")

        return sql_rows
示例#7
0
def main(args):
    dataset_name = args.dataset_name

    logger.info("Getting dataset info for %s" % dataset_name)
    data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name)
    mask_file = path.join(data_path, "mask.npy")
    mask = np.load(mask_file)
    input_dim = (mask == 1).sum()

    user = path.expandvars("$USER")
    save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%s"
                                  % (user, "rbm_simple_test"))

    # File parameters are path specific ones (not model specific).
    file_params = {"save_path": save_path,
                   }

    yaml_template = open(yaml_file).read()
    hyperparams = expand(flatten(experiment.default_hyperparams(input_dim=input_dim)),
                         dict_type=ydict)

    # Set additional hyperparams from command line args
    if args.learning_rate is not None:
        hyperparams["learning_rate"] = args.learning_rate
    if args.batch_size is not None:
        hyperparams["batch_size"] = args.batch_size

    for param in file_params:
        yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param])

    yaml = yaml_template % hyperparams

    logger.info("Training")
    train = yaml_parse.load(yaml)
    train.main_loop()
示例#8
0
def run_experiment_jobman(state, channel):
    """
    Main jobman experiment function, called by all jobs.
    """
    experiment_module = state["experiment&module"]
    experiment = imp.load_source("module.name", experiment_module)

    yaml_template = open(experiment.yaml_file).read()
    hyperparams = expand(flatten(translate(state.hyperparams, "pylearn2")),
                         dict_type=ydict)

    if not state["out&path"].endswith("job_%d" % state["jobman.id"]):
        state["out&path"] = path.join(state["out&path"],
                                      "job_%d" % state["jobman.id"])
    channel.save()
    out_path = path.join(state["out&path"])
    try:
        run_experiment(experiment, hyperparams, ask=False, out_path=out_path,
                       dbdescr=state["dbdescr"], job_id=state["jobman.id"])
    except ValueError as e:
        if str(e) == "KILLED":
            return channel.CANCELED
        else:
            return channel.ERR_RUN
    except:
        return channel.ERR_RUN

    print "Ending experiment"
    return channel.COMPLETE
示例#9
0
    def save(self, table_name, row):
        db = self._open_db(table_name)

        with SafeSession(db) as safe_session:

            if "id" in row:
                row_id = row["id"]
                del row["id"]

                sql_row = self._load_in_safe_session(db=db,
                                                     safe_session=safe_session,
                                                     row_id=row_id)[0]
                logger.debug("update row %d" % row_id)
                with safe_session.set_timer(60 * 5):
                    sql_row.update_simple(flatten(row), safe_session.session)
            else:
                logger.debug("insert new row")
                with safe_session.set_timer(60 * 5):
                    sql_row = sql.insert_dict(flatten(row),
                                              db,
                                              session=safe_session.session)
                    if sql_row is None:
                        raise OperationalError("Identical row already exists")
                    sql_row._set_in_session(sql.JOBID, sql_row.id,
                                            safe_session.session)

            row_id = sql_row.id

            with safe_session.set_timer(60 * 5):
                safe_session.session.commit()
                logger.debug("session commited")

        # load in new session otherwise lazy attribute selection hangs
        # on forever... why is that!?
        with SafeSession(db) as safe_session:
            sql_row = self._load_in_safe_session(db,
                                                 safe_session,
                                                 row_id=row_id)[0]
            logger.debug("Fetch all row attributes...")
            eager_dict = self._eager_dicts([sql_row], safe_session)[0]
            logger.debug("Fetch done")

            eager_dict['id'] = row_id

        return eager_dict
示例#10
0
def set_hyperparams(job, db):
    dbjob = db.get(job.job_id)
    job.file_prefix = dbjob["file_parameters.save_path"]
    job.out_dir = path.join(job.table_dir, job.file_prefix.split("/")[-1])
    hyperparams = experiment.default_hyperparams()
    model_keys = flatten(hyperparams).keys()
    job.hyperparams = dict((" ".join(k.replace(".__builder__", "").split(".")),
                            dbjob.get("hyper_parameters." + k, None))
                           for k in model_keys)
示例#11
0
    def save(self, table_name, row):
        db = self._open_db(table_name)

        with SafeSession(db) as safe_session:

            if "id" in row:
                row_id = row["id"]
                del row["id"]

                sql_row = self._load_in_safe_session(
                    db=db, safe_session=safe_session, row_id=row_id)[0]
                logger.debug("update row %d" % row_id)
                with safe_session.set_timer(60 * 5):
                    sql_row.update_simple(flatten(row), safe_session.session)
            else:
                logger.debug("insert new row")
                with safe_session.set_timer(60 * 5):
                    sql_row = sql.insert_dict(flatten(row), db,
                                              session=safe_session.session)
                    if sql_row is None:
                        raise OperationalError("Identical row already exists")
                    sql_row._set_in_session(sql.JOBID, sql_row.id,
                                            safe_session.session)

            row_id = sql_row.id

            with safe_session.set_timer(60 * 5):
                safe_session.session.commit()
                logger.debug("session commited")

        # load in new session otherwise lazy attribute selection hangs
        # on forever... why is that!?
        with SafeSession(db) as safe_session:
            sql_row = self._load_in_safe_session(db, safe_session,
                                                 row_id=row_id)[0]
            logger.debug("Fetch all row attributes...")
            eager_dict = self._eager_dicts([sql_row], safe_session)[0]
            logger.debug("Fetch done")

            eager_dict['id'] = row_id

        return eager_dict
示例#12
0
def set_hyperparams(job, db):
    dbjob = db.get(job.job_id)
    job.file_prefix = dbjob["file_parameters.save_path"]
    job.out_dir = path.join(job.table_dir,
                             job.file_prefix.split("/")[-1])
    hyperparams = experiment.default_hyperparams()
    model_keys = flatten(hyperparams).keys()
    job.hyperparams = dict(
        (" ".join(k.replace(".__builder__", "").split(".")),
         dbjob.get("hyper_parameters." + k, None))
        for k in model_keys)
示例#13
0
def insert_jobexp(exp_args, jobman_args):
    """
    Insert jobman experiment jobs in a sql database.
    Remarks: We have this in a separate module since we can't refer to the jobexp.run function
    from within the jobexp module by it's absolute name (else jobman won't find the experiment when run)
    :param exp_args:
    :param jobman_args:
    :return:
    """
    table_name = jobman_args.get("table_name", "experiment")
    db = api0.open_db('postgres://*****:*****@127.0.0.1/jobbase?table='+table_name)
    for arg in jobman_args:
        sql.insert_job(predictive_rl.rlglueexp.jobexp.run, flatten(arg), db)
示例#14
0
    def update(self, table_name, rows, update_dict):
        db = self._open_db(table_name)

        with SafeSession(db) as safe_session:
            with safe_session.set_timer(60 * 5):
                for row in rows:
                    sql_row = self._load_in_safe_session(
                        db=db, safe_session=safe_session, row_id=row['id'])[0]
                    try:
                        sql_row.update_simple(flatten(update_dict), safe_session.session)
                    except:
                        sys.stderr.write("Failed for row: %d\n" % row['id'])
                safe_session.session.commit()
示例#15
0
    def update(self, table_name, rows, update_dict):
        db = self._open_db(table_name)

        with SafeSession(db) as safe_session:
            with safe_session.set_timer(60 * 5):
                for row in rows:
                    sql_row = self._load_in_safe_session(
                        db=db, safe_session=safe_session, row_id=row['id'])[0]
                    try:
                        sql_row.update_simple(flatten(update_dict),
                                              safe_session.session)
                    except:
                        sys.stderr.write("Failed for row: %d\n" % row['id'])
                safe_session.session.commit()
示例#16
0
def train_experiment(state, channel):
    """
    Train a model specified in state, and extract required results.

    This function builds a YAML string from ``state.yaml_template``, taking
    the values of hyper-parameters from ``state.hyper_parameters``, creates
    the corresponding object and trains it (like train.py), then run the
    function in ``state.extract_results`` on it, and store the returned values
    into ``state.results``.

    To know how to use this function, you can check the example in tester.py
    (in the same directory).
    """
    yaml_template = state.yaml_template

    # Convert nested DD into nested ydict.
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)

    # This will be the complete yaml string that should be executed
    final_yaml_str = yaml_template % hyper_parameters

    # Instantiate an object from YAML string
    train_obj = pylearn2.config.yaml_parse.load(final_yaml_str)

    for ext in train_obj.extensions:
        if hasattr(ext, 'set_train_obj'):
            ext.set_train_obj(train_obj)
        if hasattr(ext, 'set_jobman_channel'):
            ext.set_jobman_channel(channel)
        if hasattr(ext, 'set_jobman_state'):
            ext.set_jobman_state(state)

    try:
        iter(train_obj)
        iterable = True
    except TypeError:
        iterable = False
    if iterable:
        raise NotImplementedError(
            ('Current implementation does not support running multiple '
             'models in one yaml string.  Please change the yaml template '
             'and parameters to contain only one single model.'))
    else:
        # print "Executing the model."
        train_obj.main_loop()
        # This line will call a function defined by the user and pass train_obj
        # to it.
        state.results = jobman.tools.resolve(state.extract_results)(train_obj)
        return channel.COMPLETE
示例#17
0
def train_experiment(state, channel):
    """
    Train a model specified in state, and extract required results.

    This function builds a YAML string from ``state.yaml_template``, taking
    the values of hyper-parameters from ``state.hyper_parameters``, creates
    the corresponding object and trains it (like train.py), then run the
    function in ``state.extract_results`` on it, and store the returned values
    into ``state.results``.

    To know how to use this function, you can check the example in tester.py
    (in the same directory).
    """
    yaml_template = state.yaml_template

    # Convert nested DD into nested ydict.
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)

    # This will be the complete yaml string that should be executed
    final_yaml_str = yaml_template % hyper_parameters

    # Instantiate an object from YAML string
    train_obj = pylearn2.config.yaml_parse.load(final_yaml_str)

    for ext in train_obj.extensions:
      if hasattr(ext, 'set_train_obj'):
        ext.set_train_obj( train_obj )
      if hasattr(ext, 'set_jobman_channel'):
        ext.set_jobman_channel( channel )
      if hasattr(ext, 'set_jobman_state'):
        ext.set_jobman_state( state )

    try:
        iter(train_obj)
        iterable = True
    except TypeError:
        iterable = False
    if iterable:
        raise NotImplementedError(
                ('Current implementation does not support running multiple '
                 'models in one yaml string.  Please change the yaml template '
                 'and parameters to contain only one single model.'))
    else:
        # print "Executing the model."
        train_obj.main_loop()
        # This line will call a function defined by the user and pass train_obj
        # to it.
        state.results = jobman.tools.resolve(state.extract_results)(train_obj)
        return channel.COMPLETE
示例#18
0
def run_experiment(experiment, **kwargs):
    """
    Experiment function.
    Used by jobman to run jobs. Must be loaded externally.
    TODO: add sigint handling.

    Parameters
    ----------
    experiment: module
        Experiment module.
    kwargs: dict
        Typically hyperparameters.
    """

    hyper_parameters = experiment.default_hyperparams()
    set_hyper_parameters(hyper_parameters, **kwargs)
    file_parameters = experiment.fileparams
    set_hyper_parameters(file_parameters, **kwargs)
    hyper_parameters.update(file_parameters)

    ih = MRIInputHandler()
    input_dim, variance_map_file = ih.get_input_params(hyper_parameters)
    hyper_parameters["nvis"] = input_dim
    hyper_parameters["variance_map_file"] = variance_map_file

    pid = os.getpid()
    out_path = serial.preprocess(
        hyper_parameters.get("out_path", "${PYLEARN2_OUTS}"))
    if not path.isdir(out_path):
        os.mkdir(out_path)
    if not path.isdir(path.join(out_path, "logs")):
        os.mkdir(path.join(out_path, "logs"))

    hyper_parameters = expand(flatten(hyper_parameters), dict_type=ydict)

    lh = LogHandler(experiment, hyper_parameters, out_path, pid)
    h = logging.StreamHandler(lh)
    monitor.log.addHandler(h)

    yaml_template = open(experiment.yaml_file).read()
    yaml = yaml_template % hyper_parameters
    train_object = yaml_parse.load(yaml)
    try:
        train_object.main_loop()
        lh.finish("COMPLETED")
    except KeyboardInterrupt:
        print("Quitting...")
        lh.finish("KILLED")
示例#19
0
def run_experiment(experiment, **kwargs):
    """
    Experiment function.
    Used by jobman to run jobs. Must be loaded externally.
    TODO: add sigint handling.

    Parameters
    ----------
    experiment: module
        Experiment module.
    kwargs: dict
        Typically hyperparameters.
    """

    hyper_parameters = experiment.default_hyperparams()
    set_hyper_parameters(hyper_parameters, **kwargs)
    file_parameters = experiment.fileparams
    set_hyper_parameters(file_parameters, **kwargs)
    hyper_parameters.update(file_parameters)

    ih = MRIInputHandler()
    input_dim, variance_map_file = ih.get_input_params(hyper_parameters)
    hyper_parameters["nvis"] = input_dim
    hyper_parameters["variance_map_file"] = variance_map_file

    pid = os.getpid()
    out_path = serial.preprocess(
        hyper_parameters.get("out_path", "${PYLEARN2_OUTS}"))
    if not path.isdir(out_path):
        os.mkdir(out_path)
    if not path.isdir(path.join(out_path, "logs")):
        os.mkdir(path.join(out_path, "logs"))

    hyper_parameters = expand(flatten(hyper_parameters), dict_type=ydict)

    lh = LogHandler(experiment, hyper_parameters, out_path, pid)
    h = logging.StreamHandler(lh)
    monitor.log.addHandler(h)

    yaml_template = open(experiment.yaml_file).read()
    yaml = yaml_template % hyper_parameters
    train_object = yaml_parse.load(yaml)
    try:
        train_object.main_loop()
        lh.finish("COMPLETED")
    except KeyboardInterrupt:
        print("Quitting...")
        lh.finish("KILLED")
示例#20
0
def train_experiment(state, channel):
    """
    Train a model specified in state, and extract required results.

    This function builds a YAML string from ``state.yaml_template``, taking
    the values of hyper-parameters from ``state.hyper_parameters``, creates
    the corresponding object and trains it (like train.py), then run the
    function in ``state.extract_results`` on it, and store the returned values
    into ``state.results``.

    To know how to use this function, you can check the example in tester.py
    (in the same directory).
    """
    yaml_template = state.yaml_template

    # Convert nested DD into nested ydict.
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)

    # This will be the complete yaml string that should be executed
    final_yaml_str = yaml_template % hyper_parameters

    # write to .yaml file for ease of reproducibility
    fp = open('experiment.yaml', 'w')
    fp.write('%s' % final_yaml_str[2:])
    fp.close()

    # Instantiate an object from YAML string
    train_obj = pylearn2.config.yaml_parse.load(final_yaml_str)

    try:
        iter(train_obj)
        iterable = True
    except TypeError:
        iterable = False
    if iterable:
        raise NotImplementedError(
            ('Current implementation does not support running multiple '
             'models in one yaml string.  Please change the yaml template '
             'and parameters to contain only one single model.'))
    else:
        # print "Executing the model."
        # (GD) HACK HACK
        train_obj.model.jobman_channel = channel
        train_obj.model.jobman_state = state
        train_obj.main_loop()
        return channel.COMPLETE
示例#21
0
def train_experiment(state, channel):
    """
    Train a model specified in state, and extract required results.

    This function builds a YAML string from ``state.yaml_template``, taking
    the values of hyper-parameters from ``state.hyper_parameters``, creates
    the corresponding object and trains it (like train.py), then run the
    function in ``state.extract_results`` on it, and store the returned values
    into ``state.results``.

    To know how to use this function, you can check the example in tester.py
    (in the same directory).
    """
    yaml_template = state.yaml_template

    # Convert nested DD into nested ydict.
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)

    # This will be the complete yaml string that should be executed
    final_yaml_str = yaml_template % hyper_parameters
    
    # write to .yaml file for ease of reproducibility
    fp = open('experiment.yaml', 'w')
    fp.write('%s' % final_yaml_str[2:]) 
    fp.close()

    # Instantiate an object from YAML string
    train_obj = pylearn2.config.yaml_parse.load(final_yaml_str)

    try:
        iter(train_obj)
        iterable = True
    except TypeError:
        iterable = False
    if iterable:
        raise NotImplementedError(
                ('Current implementation does not support running multiple '
                 'models in one yaml string.  Please change the yaml template '
                 'and parameters to contain only one single model.'))
    else:
        # print "Executing the model."
        # (GD) HACK HACK
        train_obj.model.jobman_channel = channel
        train_obj.model.jobman_state = state
        train_obj.main_loop()
        return channel.COMPLETE
示例#22
0
def load_experiments_jobman(experiment_module, jobargs):
    """
    Load jobs from experiment onto postgresql database table.
    """
    dbdescr = get_desc(jobargs)
    db = api0.open_db(dbdescr)

    experiment = imp.load_source("module.name", experiment_module)
    for i, items in enumerate(experiment.generator):
        hyperparams = experiment.default_hyperparams
        state = DD()
        set_hyper_parameters(hyperparams, **dict((k, v) for k, v in items))
        state.hyperparams = translate(hyperparams, "knex")
        state["out&path"] = path.abspath(jobargs["out_path"])
        state["experiment&module"] = path.abspath(experiment_module)
        state["dbdescr"] = dbdescr

        sql.insert_job(run_experiment_jobman, flatten(state), db)
    db.createView("%s" % jobargs["table"])
示例#23
0
def main(dataset_name="smri"):
    logger.info("Getting dataset info for %s" % args.dataset_name)
    data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name)
    mask_file = path.join(data_path, "mask.npy")
    mask = np.load(mask_file)
    input_dim = (mask == 1).sum()
    if input_dim % 2 == 1:
        input_dim -= 1
    mri = MRI.MRI_Standard(which_set="full",
                           dataset_name=args.dataset_name,
                           unit_normalize=True,
                           even_input=True,
                           apply_mask=True)
    variance_map_file = path.join(data_path, "variance_map.npy")
    mri_nifti.save_variance_map(mri, variance_map_file)

    user = path.expandvars("$USER")
    save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%s"
                                  % (user, "jobman_test"))

    file_params = {"save_path": save_path,
                   "variance_map_file": variance_map_file
                   }

    yaml_template = open(yaml_file).read()
    hyperparams = expand(flatten(mlp_experiment.default_hyperparams(input_dim=input_dim)),
                         dict_type=ydict)

    for param in hyperparams:
        if hasattr(args, param) and getattr(args, param):
            val = getattr(args, param)
            logger.info("Filling %s with %r" % (param, val))
            hyperparams[param] = type(hyperparams[param])(val)

    for param in file_params:
        yaml_template = yaml_template.replace("%%(%s)s" % param, file_params[param])

    yaml = yaml_template % hyperparams
    print yaml
    logger.info("Training")
    train = yaml_parse.load(yaml)
    train.main_loop()
示例#24
0
def load_experiments_jobman(experiment_module, jobargs):
    """
    Load jobs from experiment onto postgresql database table.
    """
    dbdescr = get_desc(jobargs)
    db = api0.open_db(dbdescr)

    experiment = imp.load_source("module.name", experiment_module)
    for i, items in enumerate(experiment.generator):
        hyperparams = experiment.default_hyperparams
        state = DD()
        set_hyper_parameters(hyperparams, **dict((k, v) for k, v in items))
        state.hyperparams = translate(hyperparams, "knex")
        state["out&path"] = path.abspath(jobargs["out_path"])
        state["experiment&module"] = path.abspath(experiment_module)
        state["dbdescr"] = dbdescr

        sql.insert_job(run_experiment_jobman,
                       flatten(state),
                       db)
    db.createView("%s" % jobargs["table"])
示例#25
0
    def _load_in_safe_session(self, db, safe_session,
                              filter_eq_dct=None, row_id=None, hash_of=None):

        with safe_session.set_timer(60 * 5):
            logger.debug("Query session...")
            q = db.query(safe_session.session)
            if row_id is not None:
                sql_row = q._query.get(row_id)
                if sql_row is None:
                    raise OperationalError("There is no rows with id \"%d\"" % row_id)
                sql_rows = [sql_row]
            elif hash_of is not None:
                hashcode = sql.hash_state(flatten(hash_of))
                sql_rows = q._query.filter(db._Dict.hash == hashcode).all()
            elif filter_eq_dct is not None:
                sql_rows = q.filter_eq_dct(filter_eq_dct).all()
            else:
                sql_rows = q.all()
            logger.debug("Query done")

        return sql_rows
示例#26
0
def main(args):
    dataset_name = args.dataset_name

    logger.info("Getting dataset info for %s" % dataset_name)
    data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name)
    mask_file = path.join(data_path, "mask.npy")
    mask = np.load(mask_file)
    input_dim = (mask == 1).sum()

    user = path.expandvars("$USER")
    save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%s" %
                                  (user, "rbm_simple_test"))

    # File parameters are path specific ones (not model specific).
    file_params = {
        "save_path": save_path,
    }

    yaml_template = open(yaml_file).read()
    hyperparams = expand(flatten(
        experiment.default_hyperparams(input_dim=input_dim)),
                         dict_type=ydict)

    # Set additional hyperparams from command line args
    if args.learning_rate is not None:
        hyperparams["learning_rate"] = args.learning_rate
    if args.batch_size is not None:
        hyperparams["batch_size"] = args.batch_size

    for param in file_params:
        yaml_template = yaml_template.replace("%%(%s)s" % param,
                                              file_params[param])

    yaml = yaml_template % hyperparams

    logger.info("Training")
    train = yaml_parse.load(yaml)
    train.main_loop()
    state.hyper_parameters = {
        'trainfile': 'train_gray_uvd_rot_1562_31.h5',
        'N': 10 * 64,
        'batch_size': 64 * 2,
        'c1': c1,
        'kernel_c1': kernel_c1,
        'pool_c1': pool_c1,
        'c2': c2,
        'kernel_c2': kernel_c2,
        'pool_c2': pool_c2,
        'irange_c1': irange_c1,
        'irange_c2': irange_c2,
        'irange_hd1': irange_hd1,
        'irange_hd2': irange_hd2,
        'irange_out': irange_out,
        'hd1': hd1,
        'hd2': hd2,
        'output_dim': outdim,
        'lamda': lamda,
        'decay': decay,
        'max_epochs': 100,
        'save_best_path': save_best_path
    }

    yaml_template = state.yaml_template
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)
    # This will be the complete yaml string that should be executed
    final_yaml_str = yaml_template % hyper_parameters
    train_obj = pylearn2.config.yaml_parse.load(final_yaml_str)
    train_obj.main_loop()
示例#28
0
文件: tmp_depth2.py 项目: HiQiQi/src
                        'n': 2*64,
                            'batch_size': 64,
                            'c1': c1,
                            'kernel_c1':kernel_c1,
                            'pool_c1':pool_c1,
                            'c2': c2,
                            'kernel_c2':kernel_c2,
                            'pool_c2':pool_c2,
                            'irange_c1':irange_c1,
                            'irange_c2':irange_c2,
                            'irange_hd1':irange_hd1,
                            'irange_hd2':irange_hd2,
                            'irange_out':irange_out,
                            'hd1': 2592,
                            'hd2': 36,
                            'output_dim':constants.NUM_JNTS * 3,
                            'lamda':lamda,
                            'decay':decay,
                            'max_epochs': 50,
                            'save_best_path': save_best_path
            }

    yaml_template = state.yaml_template
    hyper_parameters = expand(flatten(state.hyper_parameters), dict_type=ydict)
    # This will be the complete yaml string that should be executed
    final_yaml_str = yaml_template % hyper_parameters
    train_obj = pylearn2.config.yaml_parse.load(final_yaml_str)
    train_obj.main_loop()


# hadag
示例#29
0
def load_experiments(args):
    dataset_name = args.dataset_name
    db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s"
                % {"user": args.user,
                   "host": args.host,
                   "port": args.port,
                   "database": args.database,
                   "table": args.table,
                   })

    logger.info("Getting dataset info for %s%s"
                % (dataset_name, ", transposed" if args.transposed else ""))
    data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name)

    if args.transposed:
        logger.info("Data in transpose...")
        mri = MRI.MRI_Transposed(dataset_name=args.dataset_name,
                                 unit_normalize=True,
                                 even_input=True,
                                 apply_mask=True)
        input_dim = mri.X.shape[1]
        variance_map_file = path.join(data_path, "transposed_variance_map.npy")
    else:
        mask_file = path.join(data_path, "mask.npy")
        mask = np.load(mask_file)
        input_dim = (mask == 1).sum()
        if input_dim % 2 == 1:
            input_dim -= 1
        mri = MRI.MRI_Standard(which_set="full",
                               dataset_name=args.dataset_name,
                               unit_normalize=True,
                               even_input=True,
                               apply_mask=True)
        variance_map_file = path.join(data_path, "variance_map.npy")

    mri_nifti.save_variance_map(mri, variance_map_file)

    for items in nested_generator(layer_depth_generator("encoder.layer_depths", 
                                                        xrange(4, 6), 5),
                                  hidden_generator("encoder.nhid", 4),
                                  float_generator("weight_decay.coeffs.z", 3, 0.1, 0.001, log_scale=True)):
#        logger.info("Adding NICE experiment with hyperparameters %s" % (items, ))
        state = DD()

        experiment_hyperparams = nice_experiment.default_hyperparams(input_dim)
        if args.transposed:
            experiment_hyperparams["data_class"] = "MRI_Transposed"
        if args.logistic:
            experiment_hyperparams["prior"]["__builder__"] =\
                "nice.pylearn2.models.nice.StandardLogistic"

        for key, value in items:
            split_keys = key.split(".")
            entry = experiment_hyperparams
            for k in split_keys[:-1]:
                entry = entry[k]
            entry[split_keys[-1]] = value
        experiment_hyperparams["dataset_name"] = dataset_name
        h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\
                                   [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()])))

        user = path.expandvars("$USER")
        save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%d"
                                      % (user, h))

        file_params = {
            "save_path": save_path,
            "variance_map_file": variance_map_file,
            }

        state.file_parameters = file_params
        state.hyper_parameters = experiment_hyperparams

        sql.insert_job(
            nice_experiment.experiment,
            flatten(state),
            db
            )

    db.createView("%s_view" % args.table)
示例#30
0
def load_experiments(args):
    dataset_name = args.dataset_name

    # Load the database and table.
    db = sql.db(
        "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" %
        {
            "user": args.user,
            "host": args.host,
            "port": args.port,
            "database": args.database,
            "table": args.table,
        })

    # Don't worry about this yet.
    input_handler = InputHandler()

    # For generating models, we use a special set of jobman generators, made
    # for convenience.
    for items in jg.nested_generator(
            jg.float_generator("learning_rate",
                               3,
                               0.01,
                               0.0001,
                               log_scale=True),
            jg.list_generator("nhid", [50, 100, 200, 300]),
    ):

        logger.info("Adding RBM experiment across hyperparameters %s" %
                    (items, ))
        state = DD()

        # Load experiment hyperparams from experiment
        experiment_hyperparams = experiment.default_hyperparams()

        # Set them with values in our loop.
        for key, value in items:
            split_keys = key.split(".")
            entry = experiment_hyperparams
            for k in split_keys[:-1]:
                entry = entry[k]
            assert split_keys[-1] in entry, (
                "Key not found in hyperparams: %s" % split_keys[-1])
            entry[split_keys[-1]] = value

        # Set the dataset name
        experiment_hyperparams["dataset_name"] = dataset_name

        # Get the input dim and variance map. Don't worry about variance maps right now,
        # they aren't used here.
        input_dim, variance_map_file = input_handler.get_input_params(
            args, experiment_hyperparams)
        logger.info("%s\n%s\n" % (input_dim, variance_map_file))

        # Set the input dimensionality by the data
        experiment_hyperparams["nvis"] = input_dim

        # Set the minimum learning rate relative to the initial learning rate.
        experiment_hyperparams[
            "min_lr"] = experiment_hyperparams["learning_rate"] / 10

        # Make a unique hash for experiments. Remember that lists, dicts, and other data
        # types may not be hashable, so you may need to do some special processing. In
        # this case we convert the lists to tuples.
        h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\
                                   [tuple(v) if isinstance(v, list)
                                    else v
                                    for v in flatten(experiment_hyperparams).values()])))

        # Save path for the experiments. In this case we are sharing a directory in my
        # export directory so IT can blame me.
        save_path = serial.preprocess(
            "/export/mialab/users/dhjelm/pylearn2_outs/rbm_demo/%d" % h)

        # We save file params separately as they aren't model specific.
        file_params = {
            "save_path": save_path,
            "variance_map_file": variance_map_file,
        }

        state.file_parameters = file_params
        state.hyper_parameters = experiment_hyperparams
        user = path.expandvars("$USER")
        state.created_by = user

        # Finally we add the experiment to the table.
        sql.insert_job(experiment.experiment, flatten(state), db)

    # A view can be used when querying the database using psql. May not be needed in future.
    db.createView("%s_view" % args.table)
def load_experiments(args):

    dataset_name = args.dataset_name
    db = sql.db(
        "postgres://%(user)s@%(host)s:"
        "%(port)d/%(database)s?table=%(table)s" % {
            "user": args.user,
            "host": args.host,
            "port": args.port,
            "database": args.database,
            "table": args.table,
        })
    input_handler = InputHandler()

    for items in jg.nested_generator(
            jg.list_generator(
                "encoder.layer_depths",
                [[3, 5, 5, 5, 3], [5, 5, 5, 5, 5], [2, 4, 4, 2]]),
            jg.list_generator("variance_normalize", [False, 2]),
            jg.float_generator("weight_decay.coeff",
                               4,
                               0.1,
                               0.0001,
                               log_scale=True),
            jg.list_generator("prior.__builder__", [
                "nice.pylearn2.models.nice.StandardNormal",
                "nice.pylearn2.models.nice.StandardLogistic"
            ])):

        logger.info("Adding NICE experiment across hyperparameters %s" %
                    (items, ))
        state = DD()

        experiment_hyperparams = nice_experiment.default_hyperparams()

        for key, value in items:
            split_keys = key.split(".")
            entry = experiment_hyperparams
            for k in split_keys[:-1]:
                entry = entry[k]
            assert split_keys[-1] in entry,\
                ("Key not found in hyperparams: %s, "
                 "found: %s" % (split_keys[-1], entry.keys()))
            entry[split_keys[-1]] = value
        experiment_hyperparams["dataset_name"] = dataset_name
        input_dim, variance_map_file = input_handler.get_input_params(
            args, experiment_hyperparams)
        logger.info("%s\n%s\n" % (input_dim, variance_map_file))
        experiment_hyperparams["nvis"] = input_dim
        experiment_hyperparams["encoder"]["nvis"] = input_dim

        h = abs(hash(frozenset(
            flatten(experiment_hyperparams).keys() +\
            [tuple(v) if isinstance(v, list)
             else v for v in flatten(experiment_hyperparams).values()])))

        user = path.expandvars("$USER")
        save_path = serial.preprocess(
            "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h))

        file_params = {
            "save_path": save_path,
            "variance_map_file": variance_map_file,
        }

        state.file_parameters = file_params
        state.hyper_parameters = experiment_hyperparams

        sql.insert_job(nice_experiment.experiment, flatten(state), db)

    db.createView("%s_view" % args.table)
from jobman.tools import DD, flatten
from jobman import api0, sql

from jobman.examples.def_addition import addition_example

TABLE_NAME = 'test_add_'

# DB path...
db = api0.open_db('postgres://<user>:<pass>@<server>/<database>?table=' +
                  TABLE_NAME)

state = DD()
for first in 0, 2, 4, 6, 8, 10:
    state.first = first
    for second in 1, 3, 5, 7, 9:
        state.second = second

        sql.insert_job(addition_example, flatten(state), db)
示例#33
0
def create_jobman_jobs():

    #Database operations
    TABLE_NAME = "arcade_multi_prmlp_cv_binary_8x8_40k"

    db = api0.open_db(
        'postgresql://[email protected]/gulcehrc_db?table=' +
        TABLE_NAME)

    ri = numpy.random.random_integers

    # Default values
    state = DD()
    state.dataset = \
    "/home/gulcehre/dataset/pentomino/experiment_data/pento64x64_40k_seed_23112222.npy"

    state.no_of_folds = 5
    state.exid = 0

    state.n_hiddens = [100, 200, 300]
    state.n_hidden_layers = 3

    state.learning_rate = 0.001
    state.l1_reg = 1e-5
    state.l2_reg = 1e-3
    state.n_epochs = 2
    state.batch_size = 120
    state.save_exp_data = True
    self.no_of_patches = 64
    state.cost_type = "crossentropy"
    state.n_in = 8 * 8
    state.n_out = 1

    state.best_valid_error = 0.0

    state.best_test_error = 0.0

    state.valid_obj_path_error = 0.0
    state.test_obj_path_error = 0.0

    l1_reg_values = [0., 1e-6, 1e-5, 1e-4]
    l2_reg_values = [0., 1e-5, 1e-4]

    learning_rates = numpy.logspace(numpy.log10(0.0001), numpy.log10(1), 36)
    num_hiddens = numpy.logspace(numpy.log10(256), numpy.log10(2048), 24)

    for i in xrange(NO_OF_TRIALS):
        state.exid = i
        state.n_hidden_layers = ri(4)
        n_hiddens = []

        for i in xrange(state.n_hidden_layers):
            n_hiddens.append(int(num_hiddens[ri(num_hiddens.shape[0]) - 1]))

        state.n_hiddens = n_hiddens

        state.learning_rate = learning_rates[ri(learning_rates.shape[0]) - 1]
        state.l1_reg = l1_reg_values[ri(len(l1_reg_values)) - 1]
        state.l2_reg = l2_reg_values[ri(len(l2_reg_values)) - 1]
        sql.insert_job(experiment, flatten(state), db)

    db.createView(TABLE_NAME + "_view")
示例#34
0
def run_experiment(experiment,
                   hyper_parameters=None,
                   ask=True,
                   keep=False,
                   dbdescr=None,
                   job_id=None,
                   debug=False,
                   dataset_root="${PYLEARN2_NI_PATH}",
                   **kwargs):
    """
    Experiment function.
    Used by jobman to run jobs. Must be loaded externally.
    TODO: add sigint handling.

    Parameters
    ----------
    experiment: module
        Experiment module.
    kwargs: dict
        Typically hyperparameters.
    """

    # Fill the hyperparameter values.
    if hyper_parameters is None:
        hyper_parameters = experiment.default_hyperparams

    set_hyper_parameters(hyper_parameters, **kwargs)
    file_parameters = experiment.fileparams
    set_hyper_parameters(file_parameters, **kwargs)
    hyper_parameters.update(file_parameters)

    # Set the output path, default from environment variable $PYLEARN2_OUTS
    out_path = serial.preprocess(
        hyper_parameters.get("out_path", "${PYLEARN2_OUTS}"))
    if not path.isdir(out_path):
        os.makedirs(out_path)

    processing_flag = mp.Value("b", False)
    mem = mp.Value("f", 0.0)
    cpu = mp.Value("f", 0.0)
    last_processed = mp.Manager().dict()
    last_processed["value"] = "Never"

    lh = LogHandler(experiment, out_path, processing_flag, mem, cpu,
                    last_processed, dbdescr, job_id)
    h = logging.StreamHandler(lh)
    lh.logger.info("Hijacking pylearn2 logger (sweet)...")
    monitor.log.addHandler(h)

    try:
        # HACK TODO: fix this. For some reason knex formatted strings are
        # sometimes getting in.
        hyper_parameters = translate(hyper_parameters, "pylearn2")

        # Use the input hander to get input information.
        ih = input_handler.MRIInputHandler()
        input_dim, variance_map_file = ih.get_input_params(
            hyper_parameters, dataset_root=dataset_root)
        if hyper_parameters["nvis"] is None:
            hyper_parameters["nvis"] = input_dim

        # Hack for NICE. Need to rethink inner-dependencies of some model params.
        if ("encoder" in hyper_parameters.keys()
                and "nvis" in hyper_parameters["encoder"].keys()
                and hyper_parameters["encoder"]["nvis"] is None):
            hyper_parameters["encoder"]["nvis"] = input_dim

        # If there's min_lr, make it 1/10 learning_rate
        if "min_lr" in hyper_parameters.keys():
            hyper_parameters["min_lr"] = hyper_parameters["learning_rate"] / 10

        # Corruptor is a special case of hyper parameters that depends on input
        # file: variance_map. So we hack it in here.
        if "corruptor" in hyper_parameters.keys():
            if "variance_map" in hyper_parameters["corruptor"].keys():
                hyper_parameters["corruptor"]["variance_map"] =\
                "!pkl: %s" % variance_map_file
        else:
            hyper_parameters["variance_map_file"] = variance_map_file

        lh.write_json()

        # The Process id
        pid = os.getpid()
        lh.logger.info("Proces id is %d" % pid)

        # If any pdfs are in out_path, kill or quit
        json_file = path.join(out_path, "analysis.json")
        if (ask and (path.isfile(json_file)
                     or len(glob.glob(path.join(out_path, "*.pkl"))) > 0)):
            print("Results found in %s " "Proceeding will erase." % out_path)
            command = None
            while not command in ["yes", "no", "y", "n"]:
                command = raw_input("%s: " % "Proceed?")
                if command in ["yes", "y"]:
                    break
                elif command in ["no", "n"]:
                    exit()
                else:
                    print("Please enter yes(y) or no(n)")
            if path.isfile(json_file):
                with open(json_file) as f:
                    models = json.load(f)
                    for model in models.keys():
                        lh.logger.info("Removing results for model %s" % model)
                        try:
                            os.rmdir(path.join(out_path, "%s_images" % model))
                        except:
                            pass
                os.remove(json_file)

            for pkl in glob.glob(path.join(out_path, "*.pkl")):
                lh.logger.info("Removing %s" % pkl)
                os.remove(pkl)

        lh.logger.info("Making the train object")
        hyper_parameters = expand(flatten(hyper_parameters), dict_type=ydict)
        yaml_template = open(experiment.yaml_file).read()
        yaml = yaml_template % hyper_parameters
        train_object = yaml_parse.load(yaml)
        if debug:
            return train_object
        lh.write_json()

        lh.logger.info("Seting up subprocesses")
        lh.logger.info("Setting up listening socket")
        mp_ep, s_ep = mp.Pipe()
        p = mp.Process(target=server, args=(pid, s_ep))
        p.start()
        port = mp_ep.recv()
        lh.logger.info("Listening on port %d" % port)

        lh.logger.info("Starting model processor")
        model_processor = ModelProcessor(experiment, train_object.save_path,
                                         mp_ep, processing_flag,
                                         last_processed)
        model_processor.start()
        lh.logger.info("Model processor started")

        lh.logger.info("Starting stat processor")
        stat_processor = StatProcessor(pid, mem, cpu)
        stat_processor.start()
        lh.logger.info("Stat processor started")

        lh.update(hyperparams=hyper_parameters, yaml=yaml, pid=pid, port=port)
        lh.write_json()

    except Exception as e:
        lh.logger.exception(e)
        lh.finish("FAILED")
        raise e

    # Clean the model after running
    def clean():
        p.terminate()
        lh.logger.info("waiting for server...")
        p.join()
        model_processor.terminate()
        lh.logger.info("waiting for model processor...")
        model_processor.join()
        stat_processor.terminate()
        lh.logger.info("waiting for stat processor...")
        stat_processor.join()

        if keep:
            lh.logger.info("Keeping checkpoints")
        else:
            lh.logger.info("Cleaning checkpoints")
            try:
                os.remove(model_processor.best_checkpoint)
            except:
                pass
            try:
                os.remove(model_processor.checkpoint)
            except:
                pass

    # A signal handler so processes kill cleanly.
    def signal_handler(signum, frame):
        lh.logger.info("Forced quitting...")
        clean()
        lh.finish("KILLED")
        if dbdescr is None:
            exit()
        else:
            raise ValueError("KILLED")

    signal.signal(signal.SIGINT, signal_handler)

    # Main loop.
    try:
        lh.logger.info("Training...")
        train_object.main_loop()
        lh.logger.info("Training quit without exception")
    except Exception as e:
        lh.logger.exception(e)
        clean()
        lh.finish("FAILED")
        raise (e)

    # After complete, process model.
    lh.logger.info("Processing model results...")
    try:
        experiment.analyze_fn(model_processor.best_checkpoint,
                              model_processor.out_path)
    except IOError:
        experiment.analyze_fn(model_processor.checkpoint,
                              model_processor.out_path)
    except Exception as e:
        lh.logger.error(e)

    # Clean checkpoints.
    clean()
    lh.logger.info("Finished experiment.")
    lh.finish("COMPLETED")
    return
示例#35
0
state = DD()
state.learning_rate = 0.01
state.L1_reg = 0.00
state.L2_reg = 0.0001
state.n_iter = 50
state.batch_size = 20
state.n_hidden = 10

# Hyperparameter exploration
for n_hidden in 20, 30:
    state.n_hidden = n_hidden

    # Explore L1 regularization w/o L2
    state.L2_reg = 0.
    for L1_reg in 0., 1e-6, 1e-5, 1e-4:
        state.L1_reg = L1_reg

        # Insert job
        sql.insert_job(experiment, flatten(state), db)

    # Explore L2 regularization w/o L1
    state.L1_reg = 0.
    for L2_reg in 1e-5, 1e-4:
        state.L1_reg = L1_reg

        # Insert job
        sql.insert_job(experiment, flatten(state), db)

# Create the view
db.createView(TABLE_NAME + '_view')
示例#36
0
state.batch_size = 20
state.n_hidden = 10

# Hyperparameter exploration
for n_hidden in 20, 30:

    print "h_hidden =",h_hidden
    state.n_hidden = n_hidden

    # Explore L1 regularization w/o L2
    state.L2_reg = 0.
    for L1_reg in 0., 1e-6, 1e-5, 1e-4:
        print "L1_reg =",L1_reg
        state.L1_reg = L1_reg

        # Insert job
        sql.insert_job(experiment, flatten(state), db)

    # Explore L2 regularization w/o L1
    state.L1_reg = 0.
    for L2_reg in 1e-5, 1e-4:
        print "L2_reg =",L2_reg
        state.L1_reg = L1_reg

        # Insert job
        sql.insert_job(experiment, flatten(state), db)

# Create the view
db.createView(TABLE_NAME+'_view')

示例#37
0
def load_experiments(args):
    dataset_name = args.dataset_name
    db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s"
                % {"user": args.user,
                   "host": args.host,
                   "port": args.port,
                   "database": args.database,
                   "table": args.table,
                   })

    logger.info("Getting dataset info for %s" % dataset_name)
    data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name)
    mask_file = path.join(data_path, "mask.npy")
    mask = np.load(mask_file)
    input_dim = (mask == 1).sum()
    if input_dim % 2 == 1:
        input_dim -= 1
    mri = MRI.MRI_Standard(which_set="full",
                           dataset_name=dataset_name,
                           unit_normalize=True,
                           even_input=True,
                           apply_mask=True)
    variance_map_file = path.join(data_path, "variance_map.npy")
    mri_nifti.save_variance_map(mri, variance_map_file)

    for items in jg.nested_generator(jg.hidden_generator("nhid1", 1),
                                     jg.hidden_generator("nhid2", 1),
                                     ):

        state = DD()
        experiment_hyperparams = mlp_experiment.default_hyperparams(input_dim)

        for key, value in items:
            split_keys = key.split(".")
            entry = experiment_hyperparams
            for k in split_keys[:-1]:
                entry = entry[k]
            entry[split_keys[-1]] = value
        
        experiment_hyperparams["dataset_name"] = dataset_name

        h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\
                                   flatten(experiment_hyperparams).values())))

        user = path.expandvars("$USER")
        save_path = serial.preprocess("/export/mialab/users/%s/pylearn2_outs/%d"
                                      % (user, h))

        file_params = {
            "save_path": save_path,
            "variance_map_file": variance_map_file,
            }

        state.file_parameters = file_params
        state.hyper_parameters = experiment_hyperparams
        state.pid = 0

        sql.insert_job(
            mlp_experiment.experiment,
            flatten(state),
            db
            )

    db.createView("%s_view" % args.table)
示例#38
0
def load_experiments(args):
    dataset_name = args.dataset_name

    # Load the database and table.
    db = sql.db("postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s"
                % {"user": args.user,
                   "host": args.host,
                   "port": args.port,
                   "database": args.database,
                   "table": args.table,
                   })

    # Don't worry about this yet.
    input_handler = InputHandler()

    # For generating models, we use a special set of jobman generators, made
    # for convenience.
    for items in jg.nested_generator(
        jg.float_generator("learning_rate", 3, 0.01, 0.0001, log_scale=True),
        jg.list_generator("nhid", [50, 100, 200, 300]),
        ):

        logger.info("Adding RBM experiment across hyperparameters %s" % (items, ))
        state = DD()

        # Load experiment hyperparams from experiment
        experiment_hyperparams = experiment.default_hyperparams()

        # Set them with values in our loop.
        for key, value in items:
            split_keys = key.split(".")
            entry = experiment_hyperparams
            for k in split_keys[:-1]:
                entry = entry[k]
            assert split_keys[-1] in entry, ("Key not found in hyperparams: %s"
                                             % split_keys[-1])
            entry[split_keys[-1]] = value

        # Set the dataset name
        experiment_hyperparams["dataset_name"] = dataset_name

        # Get the input dim and variance map. Don't worry about variance maps right now,
        # they aren't used here.
        input_dim, variance_map_file = input_handler.get_input_params(args,
                                                                      experiment_hyperparams)
        logger.info("%s\n%s\n" % (input_dim, variance_map_file))

        # Set the input dimensionality by the data
        experiment_hyperparams["nvis"] = input_dim

        # Set the minimum learning rate relative to the initial learning rate.
        experiment_hyperparams["min_lr"] = experiment_hyperparams["learning_rate"] / 10

        # Make a unique hash for experiments. Remember that lists, dicts, and other data
        # types may not be hashable, so you may need to do some special processing. In
        # this case we convert the lists to tuples.
        h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\
                                   [tuple(v) if isinstance(v, list)
                                    else v 
                                    for v in flatten(experiment_hyperparams).values()])))

        # Save path for the experiments. In this case we are sharing a directory in my 
        # export directory so IT can blame me.
        save_path = serial.preprocess("/export/mialab/users/dhjelm/pylearn2_outs/rbm_demo/%d"
                                      % h)

        # We save file params separately as they aren't model specific.
        file_params = {
            "save_path": save_path,
            "variance_map_file": variance_map_file,
            }

        state.file_parameters = file_params
        state.hyper_parameters = experiment_hyperparams
        user = path.expandvars("$USER")
        state.created_by = user

        # Finally we add the experiment to the table.
        sql.insert_job(
            experiment.experiment,
            flatten(state),
            db
            )

    # A view can be used when querying the database using psql. May not be needed in future.
    db.createView("%s_view" % args.table)
# tested didn't change at all.  This can be confusing.
from jobman.examples.def_addition import addition_example

# here we build a list of dictionaries, each of which specifies a setting of
# parameters corresponding to one job.
#
# We will pass this list to mydriver.main, so that it can potentially insert
# them, or trim down the database to match the current list, and other sorts of
# things.
state = DD()
jobs = []
for first in 0, 2, 4, 6, 8, 10:
    state.first = first
    for second in 1, 3, 5, 7, 9:
        state.second = second
        jobs.append(dict(flatten(state)))

#
# mydriver.main defines a few commands that are generally useful (insert,
# clear_db, summary, etc.) but if you want to analyse your results you can add
# additional commands.
#
# For example, if you type 'python additional_driver.py list_all', then this little function will run, because its function name is 'list_all'.
#
# For help on the meaning of the various elements of `kwargs`, see <WRITEME>.
#


@mydriver.mydriver_cmd_desc('list the contents of the database')
def list_all(db, **kwargs):
    for d in db:
示例#40
0
def run_experiment(experiment, hyper_parameters=None, ask=True, keep=False,
                   dbdescr=None, job_id=None, debug=False,
                   dataset_root="${PYLEARN2_NI_PATH}", **kwargs):
    """
    Experiment function.
    Used by jobman to run jobs. Must be loaded externally.
    TODO: add sigint handling.

    Parameters
    ----------
    experiment: module
        Experiment module.
    kwargs: dict
        Typically hyperparameters.
    """

    # Fill the hyperparameter values.
    if hyper_parameters is None:
        hyper_parameters = experiment.default_hyperparams

    set_hyper_parameters(hyper_parameters, **kwargs)
    file_parameters = experiment.fileparams
    set_hyper_parameters(file_parameters, **kwargs)
    hyper_parameters.update(file_parameters)

    # Set the output path, default from environment variable $PYLEARN2_OUTS
    out_path = serial.preprocess(
        hyper_parameters.get("out_path", "${PYLEARN2_OUTS}"))
    if not path.isdir(out_path):
        os.makedirs(out_path)

    processing_flag = mp.Value("b", False)
    mem = mp.Value("f", 0.0)
    cpu = mp.Value("f", 0.0)
    last_processed = mp.Manager().dict()
    last_processed["value"] = "Never"

    lh = LogHandler(experiment, out_path, processing_flag, mem, cpu,
                    last_processed, dbdescr, job_id)
    h = logging.StreamHandler(lh)
    lh.logger.info("Hijacking pylearn2 logger (sweet)...")
    monitor.log.addHandler(h)

    try:
        # HACK TODO: fix this. For some reason knex formatted strings are
        # sometimes getting in.
        hyper_parameters = translate(hyper_parameters, "pylearn2")

        # Use the input hander to get input information.
        ih = input_handler.MRIInputHandler()
        input_dim, variance_map_file = ih.get_input_params(
            hyper_parameters, dataset_root=dataset_root)
        if hyper_parameters["nvis"] is None:
            hyper_parameters["nvis"] = input_dim

        # Hack for NICE. Need to rethink inner-dependencies of some model params.
        if ("encoder" in hyper_parameters.keys()
            and "nvis" in hyper_parameters["encoder"].keys()
            and hyper_parameters["encoder"]["nvis"] is None):
            hyper_parameters["encoder"]["nvis"] = input_dim

        # If there's min_lr, make it 1/10 learning_rate
        if "min_lr" in hyper_parameters.keys():
            hyper_parameters["min_lr"] = hyper_parameters["learning_rate"] / 10

        # Corruptor is a special case of hyper parameters that depends on input
        # file: variance_map. So we hack it in here.
        if "corruptor" in hyper_parameters.keys():
            if "variance_map" in hyper_parameters["corruptor"].keys():
                hyper_parameters["corruptor"]["variance_map"] =\
                "!pkl: %s" % variance_map_file
        else:
            hyper_parameters["variance_map_file"] = variance_map_file

        lh.write_json()

        # The Process id
        pid = os.getpid()
        lh.logger.info("Proces id is %d" % pid)

        # If any pdfs are in out_path, kill or quit
        json_file = path.join(out_path, "analysis.json")
        if (ask and (path.isfile(json_file) or
                     len(glob.glob(path.join(out_path, "*.pkl"))) > 0)):
            print ("Results found in %s "
                   "Proceeding will erase." % out_path)
            command = None
            while not command in ["yes", "no", "y", "n"]:
                command = raw_input("%s: " % "Proceed?")
                if command in ["yes", "y"]:
                    break
                elif command in ["no", "n"]:
                    exit()
                else:
                    print ("Please enter yes(y) or no(n)")
            if path.isfile(json_file):
                with open(json_file) as f:
                    models = json.load(f)
                    for model in models.keys():
                        lh.logger.info("Removing results for model %s" % model)
                        try:
                            os.rmdir(path.join(out_path, "%s_images" % model))
                        except:
                            pass
                os.remove(json_file)

            for pkl in glob.glob(path.join(out_path, "*.pkl")):
                lh.logger.info("Removing %s" % pkl)
                os.remove(pkl)

        lh.logger.info("Making the train object")
        hyper_parameters = expand(flatten(hyper_parameters), dict_type=ydict)
        yaml_template = open(experiment.yaml_file).read()
        yaml = yaml_template % hyper_parameters
        train_object = yaml_parse.load(yaml)
        if debug:
            return train_object
        lh.write_json()

        lh.logger.info("Seting up subprocesses")
        lh.logger.info("Setting up listening socket")
        mp_ep, s_ep = mp.Pipe()
        p = mp.Process(target=server, args=(pid, s_ep))
        p.start()
        port = mp_ep.recv()
        lh.logger.info("Listening on port %d" % port)

        lh.logger.info("Starting model processor")
        model_processor = ModelProcessor(experiment, train_object.save_path,
                                         mp_ep, processing_flag, last_processed)
        model_processor.start()
        lh.logger.info("Model processor started")

        lh.logger.info("Starting stat processor")
        stat_processor = StatProcessor(pid, mem, cpu)
        stat_processor.start()
        lh.logger.info("Stat processor started")

        lh.update(hyperparams=hyper_parameters,
                  yaml=yaml,
                  pid=pid,
                  port=port)
        lh.write_json()

    except Exception as e:
        lh.logger.exception(e)
        lh.finish("FAILED")
        raise e

    # Clean the model after running
    def clean():
        p.terminate()
        lh.logger.info("waiting for server...")
        p.join()
        model_processor.terminate()
        lh.logger.info("waiting for model processor...")
        model_processor.join()
        stat_processor.terminate()
        lh.logger.info("waiting for stat processor...")
        stat_processor.join()

        if keep:
            lh.logger.info("Keeping checkpoints")
        else:
            lh.logger.info("Cleaning checkpoints")
            try:
                os.remove(model_processor.best_checkpoint)
            except:
                pass
            try:
                os.remove(model_processor.checkpoint)
            except:
                pass

    # A signal handler so processes kill cleanly.
    def signal_handler(signum, frame):
        lh.logger.info("Forced quitting...")
        clean()
        lh.finish("KILLED")
        if dbdescr is None:
            exit()
        else:
            raise ValueError("KILLED")

    signal.signal(signal.SIGINT, signal_handler)

    # Main loop.
    try:
        lh.logger.info("Training...")
        train_object.main_loop()
        lh.logger.info("Training quit without exception")
    except Exception as e:
        lh.logger.exception(e)
        clean()
        lh.finish("FAILED")
        raise(e)

    # After complete, process model.
    lh.logger.info("Processing model results...")
    try:
        experiment.analyze_fn(model_processor.best_checkpoint,
                                   model_processor.out_path)
    except IOError:
        experiment.analyze_fn(model_processor.checkpoint,
                              model_processor.out_path)
    except Exception as e:
        lh.logger.error(e)

    # Clean checkpoints.
    clean()
    lh.logger.info("Finished experiment.")
    lh.finish("COMPLETED")
    return
示例#41
0
from jobman.examples.def_addition import addition_example


# here we build a list of dictionaries, each of which specifies a setting of
# parameters corresponding to one job.
#
# We will pass this list to mydriver.main, so that it can potentially insert
# them, or trim down the database to match the current list, and other sorts of
# things.
state = DD()
jobs = []
for first in 0, 2, 4, 6, 8, 10:
    state.first = first
    for second in 1, 3, 5, 7, 9:
        state.second = second
        jobs.append(dict(flatten(state)))


#
# mydriver.main defines a few commands that are generally useful (insert,
# clear_db, summary, etc.) but if you want to analyse your results you can add
# additional commands.
#
# For example, if you type 'python additional_driver.py list_all', then this little function will run, because its function name is 'list_all'.
#
# For help on the meaning of the various elements of `kwargs`, see <WRITEME>.
#

@mydriver.mydriver_cmd_desc('list the contents of the database')
def list_all(db, **kwargs):
    for d in db:
示例#42
0
from jobman.tools import DD, flatten
from jobman import api0, sql

from jobman.examples.def_addition import addition_example

TABLE_NAME = 'test_add_'

# DB path...
db = api0.open_db(
    'postgres://<user>:<pass>@<server>/<database>?table=' + TABLE_NAME)

state = DD()
for first in 0, 2, 4, 6, 8, 10:
    state.first = first
    for second in 1, 3, 5, 7, 9:
        state.second = second

        sql.insert_job(addition_example, flatten(state), db)
示例#43
0
from jobman import sql
from jobman.parse import filemerge
from Experimentsbatchpretrain import *
import numpy

db = sql.db('postgres://[email protected]/glorotxa_db/pretrainexpe')

state = DD()
state.curridata = DD(filemerge('Curridata.conf'))

state.depth = 3
state.tie = True
state.n_hid = 1000 #nb of unit per layer
state.act = 'tanh'

state.sup_lr = 0.01
state.unsup_lr = 0.001
state.noise = 0.25

state.seed = 1

state.nbepochs_unsup = 30 #maximal number of supervised updates
state.nbepochs_sup = 1000 #maximal number of unsupervised updates per layer
state.batchsize = 10

for i in ['MNIST','CIFAR10','ImageNet','shapesetbatch']:
    state.dat =i 
    sql.insert_job(pretrain, flatten(state), db)

db.createView('pretrainexpeview')
示例#44
0
# decoding activation function is for the first layer
# e.g. inputtype 'tfidf' ('tf*idf'?) uses activation function softplus
# to decode the tf*idf.
state.inputtype = 'binary'

state.seed = 123

#here is the for loops that does the grid:

for i in [0.01, 0.001, 0.0001]:
    state.lr = [i]
    for j in [(0.7, 0.0041), (0.5, 0.003), (0.8, 0.005)]:
        state.noise_lvl = [j]
        for k in [0.001, 0.00001, 0.0]:
            state.activation_regularization_coeff = [k]
            sql.insert_job(
                NLPSDAE, flatten(state), db
            )  #this submit the current state DD to the db, if it already exist in the db no additionnal job is added.

db.createView('opentablegpuview')

#in order to access the db from a compute node you need to create an tunnel ssh connection on ang23:
#(to do one time, I think you should keep the shell open or you can create the tunnel on a screen and detached it)

#ssh -v -f -o ServerAliveInterval=60 -o ServerAliveCountMax=60 -N -L *:5432:localhost:5432 gershwin.iro.umontreal.ca

#you will need to give your LISA password.

#here is the command you use to launch 1 jobs of the db.
#THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32  sqsub -q gpu -r 2d -n 1 --gpp=1 --memperproc=2.5G -o the_output_you_want jobman sql 'postgres://glorotxa@ang23/glorotxa_db/opentablegpu' /scratch/glorotxa/
示例#45
0
def load_experiments(args):
    dataset_name = args.dataset_name
    db = sql.db(
        "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" %
        {
            "user": args.user,
            "host": args.host,
            "port": args.port,
            "database": args.database,
            "table": args.table,
        })

    logger.info("Getting dataset info for %s" % dataset_name)
    data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + dataset_name)
    mask_file = path.join(data_path, "mask.npy")
    mask = np.load(mask_file)
    input_dim = (mask == 1).sum()
    if input_dim % 2 == 1:
        input_dim -= 1
    mri = MRI.MRI_Standard(which_set="full",
                           dataset_name=dataset_name,
                           unit_normalize=True,
                           even_input=True,
                           apply_mask=True)
    variance_map_file = path.join(data_path, "variance_map.npy")
    mri_nifti.save_variance_map(mri, variance_map_file)

    for items in jg.nested_generator(
            jg.hidden_generator("nhid1", 1),
            jg.hidden_generator("nhid2", 1),
    ):

        state = DD()
        experiment_hyperparams = mlp_experiment.default_hyperparams(input_dim)

        for key, value in items:
            split_keys = key.split(".")
            entry = experiment_hyperparams
            for k in split_keys[:-1]:
                entry = entry[k]
            entry[split_keys[-1]] = value

        experiment_hyperparams["dataset_name"] = dataset_name

        h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\
                                   flatten(experiment_hyperparams).values())))

        user = path.expandvars("$USER")
        save_path = serial.preprocess(
            "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h))

        file_params = {
            "save_path": save_path,
            "variance_map_file": variance_map_file,
        }

        state.file_parameters = file_params
        state.hyper_parameters = experiment_hyperparams
        state.pid = 0

        sql.insert_job(mlp_experiment.experiment, flatten(state), db)

    db.createView("%s_view" % args.table)
示例#46
0
# to decode the tf*idf.
state.inputtype = 'binary'

state.seed = 123

state.activation_regularization_coeff = [0]

#here is the for loops that does the grid:

for i in [0.01,0.001]:
    state.lr = [i]
    for j in [0.5,0.25,0.125,0.05]:
        state.noise_lvl=[j]
        for k in [1400,2500,5000]:
            state.n_hid = [k]
            sql.insert_job(NLPSDAE, flatten(state), db) #this submit the current state DD to the db, if it already exist in the db no additionnal job is added.


db.createView('opentablegpuview')

# First run this script
# PYTHONPATH=$PYTHONPATH:.. python DARPAjobs.py 

# Test the jobs are in the database:
# psql -d ift6266h10_sandbox_db -h gershwin.iro.umontreal.ca -U ift6266h10
# select id,lr,noiselvl,nhid as reg,jobman_status from opentablegpuview;
# password: f0572cd63b
# Set some values
#  update opentablegpukeyval set ival=0 where name='jobman.status';
# update opentablegpukeyval set ival=0 where name='jobman.status' where dict_id=20;
示例#47
0
                        method_name: contraction_penalty
                    },
                    coefficient: %(coefficient)f
                }
            ],
            "termination_criterion" : %(term_crit)s,
        }
    }
    '''

    state.hyper_parameters = {
            "file": "${PYLEARN2_DATA_PATH}/UTLC/pca/sylvester_train_x_pca32.npy",
            "nvis": 32,
            "nhid": 6,
            "learning_rate": 0.1,
            "batch_size": 10,
            "coefficient": 0.5,
            "term_crit": {
                "__builder__": "pylearn2.training_algorithms.sgd.EpochCounter",
                "max_epochs": 2
                }
            }

    state.extract_results = "pylearn2.scripts.jobman.tester.result_extractor"

    sql.insert_job(
            experiment.train_experiment,
            flatten(state),
            db,
            force_dup=True)
示例#48
0
def load_experiments(args):
    dataset_name = args.dataset_name
    db = sql.db(
        "postgres://%(user)s@%(host)s:%(port)d/%(database)s?table=%(table)s" %
        {
            "user": args.user,
            "host": args.host,
            "port": args.port,
            "database": args.database,
            "table": args.table,
        })

    logger.info("Getting dataset info for %s%s" %
                (dataset_name, ", transposed" if args.transposed else ""))
    data_path = serial.preprocess("${PYLEARN2_NI_PATH}/" + args.dataset_name)

    if args.transposed:
        logger.info("Data in transpose...")
        mri = MRI.MRI_Transposed(dataset_name=args.dataset_name,
                                 unit_normalize=True,
                                 even_input=True,
                                 apply_mask=True)
        input_dim = mri.X.shape[1]
        variance_map_file = path.join(data_path, "transposed_variance_map.npy")
    else:
        mask_file = path.join(data_path, "mask.npy")
        mask = np.load(mask_file)
        input_dim = (mask == 1).sum()
        if input_dim % 2 == 1:
            input_dim -= 1
        mri = MRI.MRI_Standard(which_set="full",
                               dataset_name=args.dataset_name,
                               unit_normalize=True,
                               even_input=True,
                               apply_mask=True)
        variance_map_file = path.join(data_path, "variance_map.npy")

    mri_nifti.save_variance_map(mri, variance_map_file)

    for items in nested_generator(
            layer_depth_generator("encoder.layer_depths", xrange(4, 6), 5),
            hidden_generator("encoder.nhid", 4),
            float_generator("weight_decay.coeffs.z",
                            3,
                            0.1,
                            0.001,
                            log_scale=True)):
        #        logger.info("Adding NICE experiment with hyperparameters %s" % (items, ))
        state = DD()

        experiment_hyperparams = nice_experiment.default_hyperparams(input_dim)
        if args.transposed:
            experiment_hyperparams["data_class"] = "MRI_Transposed"
        if args.logistic:
            experiment_hyperparams["prior"]["__builder__"] =\
                "nice.pylearn2.models.nice.StandardLogistic"

        for key, value in items:
            split_keys = key.split(".")
            entry = experiment_hyperparams
            for k in split_keys[:-1]:
                entry = entry[k]
            entry[split_keys[-1]] = value
        experiment_hyperparams["dataset_name"] = dataset_name
        h = abs(hash(frozenset(flatten(experiment_hyperparams).keys() +\
                                   [tuple(v) if isinstance(v, list) else v for v in flatten(experiment_hyperparams).values()])))

        user = path.expandvars("$USER")
        save_path = serial.preprocess(
            "/export/mialab/users/%s/pylearn2_outs/%d" % (user, h))

        file_params = {
            "save_path": save_path,
            "variance_map_file": variance_map_file,
        }

        state.file_parameters = file_params
        state.hyper_parameters = experiment_hyperparams

        sql.insert_job(nice_experiment.experiment, flatten(state), db)

    db.createView("%s_view" % args.table)