Exemplo n.º 1
0
def run_wall_clock_test(optimizer,
                        problem,
                        num_steps,
                        dataset=datasets.EMPTY_DATASET,
                        seed=None,
                        logdir=None,
                        batch_size=None):
    """Runs optimization with the given parameters and return average iter time.

  Args:
    optimizer: The tf.train.Optimizer instance
    problem: The problem to optimize (a problem_generator.Problem)
    num_steps: The number of steps to run optimization for
    dataset: The dataset to train the problem against
    seed: The seed used for drawing the initial parameters, or a list of
      numpy arrays used to explicitly initialize the parameters
    logdir: A directory containing model checkpoints. If given, then the
            parameters of the optimizer are loaded from the latest checkpoint
            in this folder.
    batch_size: The number of samples per batch.

  Returns:
    The average time in seconds for a single optimization iteration.
  """
    if dataset is None:
        dataset = datasets.EMPTY_DATASET
        batch_size = dataset.size
    else:
        # default batch size is the entire dataset
        batch_size = dataset.size if batch_size is None else batch_size

    # define the parameters of the optimization problem
    if isinstance(seed, (list, tuple)):
        # seed is a list of arrays
        params = problem_generator.init_fixed_variables(seed)
    else:
        # seed is an int or None
        params = problem.init_variables(seed)

    data_placeholder = tf.placeholder(tf.float32)
    labels_placeholder = tf.placeholder(tf.int32)

    obj = problem.objective(params, data_placeholder, labels_placeholder)
    gradients = problem.gradients(obj, params)
    vars_to_preinitialize = params

    with tf.Session(graph=tf.get_default_graph()) as sess:
        # initialize the parameter scope variables; necessary for apply_gradients
        sess.run(tf.variables_initializer(vars_to_preinitialize))
        train_op = optimizer.apply_gradients(zip(gradients, params))
        if isinstance(train_op, tuple) or isinstance(train_op, list):
            # LOL apply_gradients returns a tuple. Regular optimizers do not.
            train_op = train_op[0]
        vars_to_restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                            scope=OPTIMIZER_SCOPE)
        vars_to_initialize = list(
            set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) -
            set(vars_to_restore) - set(vars_to_preinitialize))
        # load or initialize optimizer variables
        if logdir is not None:
            restorer = tf.Saver(var_list=vars_to_restore)
            ckpt = tf.train.latest_checkpoint(logdir)
            restorer.restore(sess, ckpt)
        else:
            sess.run(tf.variables_initializer(vars_to_restore))
        # initialize all the other variables
        sess.run(tf.variables_initializer(vars_to_initialize))

        problem.init_fn(sess)

        # generate the minibatch indices
        batch_inds = dataset.batch_indices(num_steps, batch_size)

        avg_iter_time = []
        for batch in batch_inds:
            # data to feed in
            feed = {
                data_placeholder: dataset.data[batch],
                labels_placeholder: dataset.labels[batch]
            }

            # run the optimization train operation
            start = time.time()
            sess.run([train_op], feed_dict=feed)
            avg_iter_time.append(time.time() - start)

    return np.median(np.array(avg_iter_time))
Exemplo n.º 2
0
def run_wall_clock_test(optimizer,
                        problem,
                        num_steps,
                        dataset=datasets.EMPTY_DATASET,
                        seed=None,
                        logdir=None,
                        batch_size=None):
  """Runs optimization with the given parameters and return average iter time.

  Args:
    optimizer: The tf.train.Optimizer instance
    problem: The problem to optimize (a problem_generator.Problem)
    num_steps: The number of steps to run optimization for
    dataset: The dataset to train the problem against
    seed: The seed used for drawing the initial parameters, or a list of
      numpy arrays used to explicitly initialize the parameters
    logdir: A directory containing model checkpoints. If given, then the
            parameters of the optimizer are loaded from the latest checkpoint
            in this folder.
    batch_size: The number of samples per batch.

  Returns:
    The average time in seconds for a single optimization iteration.
  """
  if dataset is None:
    dataset = datasets.EMPTY_DATASET
    batch_size = dataset.size
  else:
    # default batch size is the entire dataset
    batch_size = dataset.size if batch_size is None else batch_size

  # define the parameters of the optimization problem
  if isinstance(seed, (list, tuple)):
    # seed is a list of arrays
    params = problem_generator.init_fixed_variables(seed)
  else:
    # seed is an int or None
    params = problem.init_variables(seed)

  data_placeholder = tf.placeholder(tf.float32)
  labels_placeholder = tf.placeholder(tf.int32)

  obj = problem.objective(params, data_placeholder, labels_placeholder)
  gradients = problem.gradients(obj, params)
  vars_to_preinitialize = params

  with tf.Session(graph=tf.get_default_graph()) as sess:
    # initialize the parameter scope variables; necessary for apply_gradients
    sess.run(tf.variables_initializer(vars_to_preinitialize))
    train_op = optimizer.apply_gradients(zip(gradients, params))
    if isinstance(train_op, tuple) or isinstance(train_op, list):
      # LOL apply_gradients returns a tuple. Regular optimizers do not.
      train_op = train_op[0]
    vars_to_restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope=OPTIMIZER_SCOPE)
    vars_to_initialize = list(
        set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) -
        set(vars_to_restore) - set(vars_to_preinitialize))
    # load or initialize optimizer variables
    if logdir is not None:
      restorer = tf.Saver(var_list=vars_to_restore)
      ckpt = tf.train.latest_checkpoint(logdir)
      restorer.restore(sess, ckpt)
    else:
      sess.run(tf.variables_initializer(vars_to_restore))
    # initialize all the other variables
    sess.run(tf.variables_initializer(vars_to_initialize))

    problem.init_fn(sess)

    # generate the minibatch indices
    batch_inds = dataset.batch_indices(num_steps, batch_size)

    avg_iter_time = []
    for batch in batch_inds:
      # data to feed in
      feed = {data_placeholder: dataset.data[batch],
              labels_placeholder: dataset.labels[batch]}

      # run the optimization train operation
      start = time.time()
      sess.run([train_op], feed_dict=feed)
      avg_iter_time.append(time.time() - start)

  return np.median(np.array(avg_iter_time))
Exemplo n.º 3
0
def test_optimizer(optimizer,
                   problem,
                   num_iter,
                   dataset=datasets.EMPTY_DATASET,
                   batch_size=None,
                   seed=None,
                   graph=None,
                   logdir=None,
                   record_every=None):
    """Tests an optimization algorithm on a given problem.

  Args:
    optimizer: Either a tf.train.Optimizer instance, or an Optimizer instance
               inheriting from trainable_optimizer.py
    problem: A Problem instance that defines an optimization problem to solve
    num_iter: The number of iterations of the optimizer to run
    dataset: The dataset to train the problem against
    batch_size: The number of samples per batch. If None (default), the
      batch size is set to the full batch (dataset.size)
    seed: A random seed used for drawing the initial parameters, or a list of
      numpy arrays used to explicitly initialize the parameters.
    graph: The tensorflow graph to execute (if None, uses the default graph)
    logdir: A directory containing model checkpoints. If given, then the
            parameters of the optimizer are loaded from the latest checkpoint
            in this folder.
    record_every: if an integer, stores the parameters, objective, and gradient
                  every recored_every iterations. If None, nothing is stored

  Returns:
    objective_values: A list of the objective values during optimization
    parameters: The parameters obtained after training
    records: A dictionary containing lists of the parameters and gradients
             during optimization saved every record_every iterations (empty if
             record_every is set to None)
  """

    if dataset is None:
        dataset = datasets.EMPTY_DATASET
        batch_size = dataset.size
    else:
        # default batch size is the entire dataset
        batch_size = dataset.size if batch_size is None else batch_size

    graph = tf.get_default_graph() if graph is None else graph
    with graph.as_default():

        # define the parameters of the optimization problem
        if isinstance(seed, (list, tuple)):
            # seed is a list of arrays
            params = problem_generator.init_fixed_variables(seed)
        else:
            # seed is an int or None
            params = problem.init_variables(seed)

        data_placeholder = tf.placeholder(tf.float32)
        labels_placeholder = tf.placeholder(tf.int32)

        # get the problem objective and gradient(s)
        obj = problem.objective(params, data_placeholder, labels_placeholder)
        gradients = problem.gradients(obj, params)

        vars_to_preinitialize = params

    with tf.Session(graph=graph) as sess:
        # initialize the parameter scope variables; necessary for apply_gradients
        sess.run(tf.variables_initializer(vars_to_preinitialize))
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # create the train operation and training variables
        try:
            train_op, real_params = optimizer.apply_gradients(
                zip(gradients, params))
            obj = problem.objective(real_params, data_placeholder,
                                    labels_placeholder)
        except TypeError:
            # If all goes well, this exception should only be thrown when we are using
            # a non-hrnn optimizer.
            train_op = optimizer.apply_gradients(zip(gradients, params))

        vars_to_restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                            scope=OPTIMIZER_SCOPE)
        vars_to_initialize = list(
            set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) -
            set(vars_to_restore) - set(vars_to_preinitialize))
        # load or initialize optimizer variables
        if logdir is not None:
            restorer = tf.Saver(var_list=vars_to_restore)
            ckpt = tf.train.latest_checkpoint(logdir)
            restorer.restore(sess, ckpt)
        else:
            sess.run(tf.variables_initializer(vars_to_restore))
        # initialize all the other variables
        sess.run(tf.variables_initializer(vars_to_initialize))

        problem.init_fn(sess)

        # generate the minibatch indices
        batch_inds = dataset.batch_indices(num_iter, batch_size)

        # run the train operation for n iterations and save the objectives
        records = defaultdict(list)
        objective_values = []
        for itr, batch in enumerate(batch_inds):

            # data to feed in
            feed = {
                data_placeholder: dataset.data[batch],
                labels_placeholder: dataset.labels[batch]
            }
            full_feed = {
                data_placeholder: dataset.data,
                labels_placeholder: dataset.labels
            }

            # record stuff
            if record_every is not None and (itr % record_every) == 0:

                def grad_value(g):
                    if isinstance(g, tf.IndexedSlices):
                        return g.values
                    else:
                        return g

                records_fetch = {}
                for p in params:
                    for key in optimizer.get_slot_names():
                        v = optimizer.get_slot(p, key)
                        records_fetch[p.name + "_" + key] = v
                gav_fetch = [(grad_value(g), v)
                             for g, v in zip(gradients, params)]

                _, gav_eval, records_eval = sess.run(
                    (obj, gav_fetch, records_fetch), feed_dict=feed)
                full_obj_eval = sess.run([obj], feed_dict=full_feed)

                records["objective"].append(full_obj_eval)
                records["grad_norm"].append(
                    [np.linalg.norm(g.ravel()) for g, _ in gav_eval])
                records["param_norm"].append(
                    [np.linalg.norm(v.ravel()) for _, v in gav_eval])
                records["grad"].append([g for g, _ in gav_eval])
                records["param"].append([v for _, v in gav_eval])
                records["iter"].append(itr)

                for k, v in records_eval.iteritems():
                    records[k].append(v)

            # run the optimization train operation
            objective_values.append(
                sess.run([train_op, obj], feed_dict=feed)[1])

        # final parameters
        parameters = [sess.run(p) for p in params]
        coord.request_stop()
        coord.join(threads)

    return objective_values, parameters, records
Exemplo n.º 4
0
def test_optimizer(optimizer,
                   problem,
                   num_iter,
                   dataset=datasets.EMPTY_DATASET,
                   batch_size=None,
                   seed=None,
                   graph=None,
                   logdir=None,
                   record_every=None):
  """Tests an optimization algorithm on a given problem.

  Args:
    optimizer: Either a tf.train.Optimizer instance, or an Optimizer instance
               inheriting from trainable_optimizer.py
    problem: A Problem instance that defines an optimization problem to solve
    num_iter: The number of iterations of the optimizer to run
    dataset: The dataset to train the problem against
    batch_size: The number of samples per batch. If None (default), the
      batch size is set to the full batch (dataset.size)
    seed: A random seed used for drawing the initial parameters, or a list of
      numpy arrays used to explicitly initialize the parameters.
    graph: The tensorflow graph to execute (if None, uses the default graph)
    logdir: A directory containing model checkpoints. If given, then the
            parameters of the optimizer are loaded from the latest checkpoint
            in this folder.
    record_every: if an integer, stores the parameters, objective, and gradient
                  every recored_every iterations. If None, nothing is stored

  Returns:
    objective_values: A list of the objective values during optimization
    parameters: The parameters obtained after training
    records: A dictionary containing lists of the parameters and gradients
             during optimization saved every record_every iterations (empty if
             record_every is set to None)
  """

  if dataset is None:
    dataset = datasets.EMPTY_DATASET
    batch_size = dataset.size
  else:
    # default batch size is the entire dataset
    batch_size = dataset.size if batch_size is None else batch_size

  graph = tf.get_default_graph() if graph is None else graph
  with graph.as_default():

    # define the parameters of the optimization problem
    if isinstance(seed, (list, tuple)):
      # seed is a list of arrays
      params = problem_generator.init_fixed_variables(seed)
    else:
      # seed is an int or None
      params = problem.init_variables(seed)

    data_placeholder = tf.placeholder(tf.float32)
    labels_placeholder = tf.placeholder(tf.int32)

    # get the problem objective and gradient(s)
    obj = problem.objective(params, data_placeholder, labels_placeholder)
    gradients = problem.gradients(obj, params)

    vars_to_preinitialize = params

  with tf.Session(graph=graph) as sess:
    # initialize the parameter scope variables; necessary for apply_gradients
    sess.run(tf.variables_initializer(vars_to_preinitialize))
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # create the train operation and training variables
    try:
      train_op, real_params = optimizer.apply_gradients(zip(gradients, params))
      obj = problem.objective(real_params, data_placeholder, labels_placeholder)
    except TypeError:
      # If all goes well, this exception should only be thrown when we are using
      # a non-hrnn optimizer.
      train_op = optimizer.apply_gradients(zip(gradients, params))

    vars_to_restore = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope=OPTIMIZER_SCOPE)
    vars_to_initialize = list(
        set(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) -
        set(vars_to_restore) - set(vars_to_preinitialize))
    # load or initialize optimizer variables
    if logdir is not None:
      restorer = tf.Saver(var_list=vars_to_restore)
      ckpt = tf.train.latest_checkpoint(logdir)
      restorer.restore(sess, ckpt)
    else:
      sess.run(tf.variables_initializer(vars_to_restore))
    # initialize all the other variables
    sess.run(tf.variables_initializer(vars_to_initialize))

    problem.init_fn(sess)

    # generate the minibatch indices
    batch_inds = dataset.batch_indices(num_iter, batch_size)

    # run the train operation for n iterations and save the objectives
    records = defaultdict(list)
    objective_values = []
    for itr, batch in enumerate(batch_inds):

      # data to feed in
      feed = {data_placeholder: dataset.data[batch],
              labels_placeholder: dataset.labels[batch]}
      full_feed = {data_placeholder: dataset.data,
                   labels_placeholder: dataset.labels}

      # record stuff
      if record_every is not None and (itr % record_every) == 0:
        def grad_value(g):
          if isinstance(g, tf.IndexedSlices):
            return g.values
          else:
            return g

        records_fetch = {}
        for p in params:
          for key in optimizer.get_slot_names():
            v = optimizer.get_slot(p, key)
            records_fetch[p.name + "_" + key] = v
        gav_fetch = [(grad_value(g), v) for g, v in zip(gradients, params)]

        _, gav_eval, records_eval = sess.run(
            (obj, gav_fetch, records_fetch), feed_dict=feed)
        full_obj_eval = sess.run([obj], feed_dict=full_feed)

        records["objective"].append(full_obj_eval)
        records["grad_norm"].append([np.linalg.norm(g.ravel())
                                     for g, _ in gav_eval])
        records["param_norm"].append([np.linalg.norm(v.ravel())
                                      for _, v in gav_eval])
        records["grad"].append([g for g, _ in gav_eval])
        records["param"].append([v for _, v in gav_eval])
        records["iter"].append(itr)

        for k, v in records_eval.iteritems():
          records[k].append(v)

      # run the optimization train operation
      objective_values.append(sess.run([train_op, obj], feed_dict=feed)[1])

    # final parameters
    parameters = [sess.run(p) for p in params]
    coord.request_stop()
    coord.join(threads)

  return objective_values, parameters, records