Exemplo n.º 1
0
def BuildChild(x_train, x_valid, x_test):
    child_model = PTBEnasChild(
        x_train,
        x_valid,
        x_test,
        rnn_l2_reg=FLAGS.child_rnn_l2_reg,
        rnn_slowness_reg=FLAGS.child_rnn_slowness_reg,
        rhn_depth=FLAGS.child_rhn_depth,
        fixed_arc=FLAGS.child_fixed_arc,
        batch_size=FLAGS.batch_size,
        bptt_steps=FLAGS.child_bptt_steps,
        lstm_num_layers=FLAGS.child_num_layers,
        lstm_hidden_size=FLAGS.child_lstm_hidden_size,
        lstm_e_keep=FLAGS.child_lstm_e_keep,
        lstm_x_keep=FLAGS.child_lstm_x_keep,
        lstm_h_keep=FLAGS.child_lstm_h_keep,
        lstm_o_keep=FLAGS.child_lstm_o_keep,
        lstm_l_skip=FLAGS.child_lstm_l_skip,
        vocab_size=10000,
        lr_init=FLAGS.child_lr,
        lr_dec_start=FLAGS.child_lr_dec_start,
        lr_dec_every=FLAGS.child_lr_dec_every,
        lr_dec_rate=FLAGS.child_lr_dec_rate,
        lr_dec_min=FLAGS.child_lr_dec_min,
        lr_warmup_val=FLAGS.child_lr_warmup_val,
        lr_warmup_steps=FLAGS.child_lr_warmup_steps,
        l2_reg=FLAGS.child_l2_reg,
        optim_moving_average=FLAGS.child_optim_moving_average,
        clip_mode="global",
        grad_bound=FLAGS.child_grad_bound,
        optim_algo="sgd",
        sync_replicas=FLAGS.child_sync_replicas,
        num_aggregate=FLAGS.child_num_aggregate,
        num_replicas=FLAGS.child_num_replicas,
        temperature=FLAGS.child_temperature,
        name="ptb_enas_model")
    return child_model
Exemplo n.º 2
0
def get_ops(x_train, x_valid, x_test):
    """Create relevant models."""

    ops = {}

    if FLAGS.search_for == "enas":
        assert FLAGS.child_lstm_hidden_size % FLAGS.child_block_size == 0, (
            "--child_block_size has to divide child_lstm_hidden_size")

        if FLAGS.child_fixed_arc is not None:
            assert not FLAGS.controller_training, (
                "with --child_fixed_arc, cannot train controller")

        child_model = PTBEnasChild(
            x_train,
            x_valid,
            x_test,
            rnn_l2_reg=FLAGS.child_rnn_l2_reg,
            rnn_slowness_reg=FLAGS.child_rnn_slowness_reg,
            rhn_depth=FLAGS.child_rhn_depth,
            fixed_arc=FLAGS.child_fixed_arc,
            batch_size=FLAGS.batch_size,
            bptt_steps=FLAGS.child_bptt_steps,
            lstm_num_layers=FLAGS.child_num_layers,
            lstm_hidden_size=FLAGS.child_lstm_hidden_size,
            lstm_e_keep=FLAGS.child_lstm_e_keep,
            lstm_x_keep=FLAGS.child_lstm_x_keep,
            lstm_h_keep=FLAGS.child_lstm_h_keep,
            lstm_o_keep=FLAGS.child_lstm_o_keep,
            lstm_l_skip=FLAGS.child_lstm_l_skip,
            vocab_size=10000,
            lr_init=FLAGS.child_lr,
            lr_dec_start=FLAGS.child_lr_dec_start,
            lr_dec_every=FLAGS.child_lr_dec_every,
            lr_dec_rate=FLAGS.child_lr_dec_rate,
            lr_dec_min=FLAGS.child_lr_dec_min,
            lr_warmup_val=FLAGS.child_lr_warmup_val,
            lr_warmup_steps=FLAGS.child_lr_warmup_steps,
            l2_reg=FLAGS.child_l2_reg,
            optim_moving_average=FLAGS.child_optim_moving_average,
            clip_mode="global",
            grad_bound=FLAGS.child_grad_bound,
            optim_algo="sgd",
            sync_replicas=FLAGS.child_sync_replicas,
            num_aggregate=FLAGS.child_num_aggregate,
            num_replicas=FLAGS.child_num_replicas,
            temperature=FLAGS.child_temperature,
            name="ptb_enas_model")

        if FLAGS.child_fixed_arc is None:
            controller_model = PTBEnasController(
                rhn_depth=FLAGS.child_rhn_depth,
                lstm_size=100,
                lstm_num_layers=1,
                lstm_keep_prob=1.0,
                tanh_constant=FLAGS.controller_tanh_constant,
                temperature=FLAGS.controller_temperature,
                lr_init=FLAGS.controller_lr,
                lr_dec_start=0,
                lr_dec_every=1000000,  # never decrease learning rate
                l2_reg=FLAGS.controller_l2_reg,
                entropy_weight=FLAGS.controller_entropy_weight,
                bl_dec=FLAGS.controller_bl_dec,
                optim_algo="adam",
                sync_replicas=FLAGS.controller_sync_replicas,
                num_aggregate=FLAGS.controller_num_aggregate,
                num_replicas=FLAGS.controller_num_replicas)

            child_model.connect_controller(controller_model)
            controller_model.build_trainer(child_model)

            controller_ops = {
                "train_step": controller_model.train_step,
                "loss": controller_model.loss,
                "train_op": controller_model.train_op,
                "lr": controller_model.lr,
                "grad_norm": controller_model.grad_norm,
                "valid_ppl": controller_model.valid_ppl,
                "optimizer": controller_model.optimizer,
                "baseline": controller_model.baseline,
                "ppl": controller_model.ppl,
                "reward": controller_model.reward,
                "entropy": controller_model.sample_entropy,
                "sample_arc": controller_model.sample_arc,
            }
        else:
            child_model.connect_controller(None)
            controller_ops = None
    else:
        raise ValueError("Unknown search_for {}".format(FLAGS.search_for))

    child_ops = {
        "global_step": child_model.global_step,
        "loss": child_model.loss,
        "train_op": child_model.train_op,
        "train_ppl": child_model.train_ppl,
        "train_reset": child_model.train_reset,
        "valid_reset": child_model.valid_reset,
        "test_reset": child_model.test_reset,
        "lr": child_model.lr,
        "grad_norm": child_model.grad_norm,
        "optimizer": child_model.optimizer,
    }

    ops = {
        "child": child_ops,
        "controller": controller_ops,
        "num_train_batches": child_model.num_train_batches,
        "eval_every": child_model.num_train_batches * FLAGS.eval_every_epochs,
        "eval_func": child_model.eval_once,
    }

    return ops
Exemplo n.º 3
0
def get_ops(x_train, x_valid, x_test):
  """Create relevant models."""

  ops = {}

  if FLAGS.search_for == "enas":
    assert FLAGS.child_lstm_hidden_size % FLAGS.child_block_size == 0, (
      "--child_block_size has to divide child_lstm_hidden_size")

    if FLAGS.child_fixed_arc is not None:
      assert not FLAGS.controller_training, (
        "with --child_fixed_arc, cannot train controller")

    child_model = PTBEnasChild(
      x_train,
      x_valid,
      x_test,
      rnn_l2_reg=FLAGS.child_rnn_l2_reg,
      rnn_slowness_reg=FLAGS.child_rnn_slowness_reg,
      rhn_depth=FLAGS.child_rhn_depth,
      fixed_arc=FLAGS.child_fixed_arc,
      batch_size=FLAGS.batch_size,
      bptt_steps=FLAGS.child_bptt_steps,
      lstm_num_layers=FLAGS.child_num_layers,
      lstm_hidden_size=FLAGS.child_lstm_hidden_size,
      lstm_e_keep=FLAGS.child_lstm_e_keep,
      lstm_x_keep=FLAGS.child_lstm_x_keep,
      lstm_h_keep=FLAGS.child_lstm_h_keep,
      lstm_o_keep=FLAGS.child_lstm_o_keep,
      lstm_l_skip=FLAGS.child_lstm_l_skip,
      vocab_size=10000,
      lr_init=FLAGS.child_lr,
      lr_dec_start=FLAGS.child_lr_dec_start
        lr_dec_start=0,
        lr_dec_every=1000000,  # never decrease learning rate
        l2_reg=FLAGS.controller_l2_reg,
        entropy_weight=FLAGS.controller_entropy_weight,
        bl_dec=FLAGS.controller_bl_dec,
        optim_algo="adam",
        sync_replicas=FLAGS.controller_sync_replicas,
        num_aggregate=FLAGS.controller_num_aggregate,
        num_replicas=FLAGS.controller_num_replicas)

      child_model.connect_controller(controller_model)
      controller_model.build_trainer(child_model)

      controller_ops = {
        "train_step": controller_model.train_step,
        "loss": controller_model.loss,
        "train_op": controller_model.train_op,
        "lr": controller_model.lr,
        "grad_norm": controller_model.grad_norm,
        "valid_ppl": controller_model.valid_ppl,
        "optimizer": controller_model.optimizer,
        "baseline": controller_model.baseline,
        "ppl": controller_model.ppl,
        "reward": controller_model.reward,
        "entropy": controller_model.sample_entropy,
        "sample_arc": controller_model.sample_arc,
      }
    else:
      child_model.connect_controller(None)
      controller_ops = None