def BuildChild(x_train, x_valid, x_test): child_model = PTBEnasChild( x_train, x_valid, x_test, rnn_l2_reg=FLAGS.child_rnn_l2_reg, rnn_slowness_reg=FLAGS.child_rnn_slowness_reg, rhn_depth=FLAGS.child_rhn_depth, fixed_arc=FLAGS.child_fixed_arc, batch_size=FLAGS.batch_size, bptt_steps=FLAGS.child_bptt_steps, lstm_num_layers=FLAGS.child_num_layers, lstm_hidden_size=FLAGS.child_lstm_hidden_size, lstm_e_keep=FLAGS.child_lstm_e_keep, lstm_x_keep=FLAGS.child_lstm_x_keep, lstm_h_keep=FLAGS.child_lstm_h_keep, lstm_o_keep=FLAGS.child_lstm_o_keep, lstm_l_skip=FLAGS.child_lstm_l_skip, vocab_size=10000, lr_init=FLAGS.child_lr, lr_dec_start=FLAGS.child_lr_dec_start, lr_dec_every=FLAGS.child_lr_dec_every, lr_dec_rate=FLAGS.child_lr_dec_rate, lr_dec_min=FLAGS.child_lr_dec_min, lr_warmup_val=FLAGS.child_lr_warmup_val, lr_warmup_steps=FLAGS.child_lr_warmup_steps, l2_reg=FLAGS.child_l2_reg, optim_moving_average=FLAGS.child_optim_moving_average, clip_mode="global", grad_bound=FLAGS.child_grad_bound, optim_algo="sgd", sync_replicas=FLAGS.child_sync_replicas, num_aggregate=FLAGS.child_num_aggregate, num_replicas=FLAGS.child_num_replicas, temperature=FLAGS.child_temperature, name="ptb_enas_model") return child_model
def get_ops(x_train, x_valid, x_test): """Create relevant models.""" ops = {} if FLAGS.search_for == "enas": assert FLAGS.child_lstm_hidden_size % FLAGS.child_block_size == 0, ( "--child_block_size has to divide child_lstm_hidden_size") if FLAGS.child_fixed_arc is not None: assert not FLAGS.controller_training, ( "with --child_fixed_arc, cannot train controller") child_model = PTBEnasChild( x_train, x_valid, x_test, rnn_l2_reg=FLAGS.child_rnn_l2_reg, rnn_slowness_reg=FLAGS.child_rnn_slowness_reg, rhn_depth=FLAGS.child_rhn_depth, fixed_arc=FLAGS.child_fixed_arc, batch_size=FLAGS.batch_size, bptt_steps=FLAGS.child_bptt_steps, lstm_num_layers=FLAGS.child_num_layers, lstm_hidden_size=FLAGS.child_lstm_hidden_size, lstm_e_keep=FLAGS.child_lstm_e_keep, lstm_x_keep=FLAGS.child_lstm_x_keep, lstm_h_keep=FLAGS.child_lstm_h_keep, lstm_o_keep=FLAGS.child_lstm_o_keep, lstm_l_skip=FLAGS.child_lstm_l_skip, vocab_size=10000, lr_init=FLAGS.child_lr, lr_dec_start=FLAGS.child_lr_dec_start, lr_dec_every=FLAGS.child_lr_dec_every, lr_dec_rate=FLAGS.child_lr_dec_rate, lr_dec_min=FLAGS.child_lr_dec_min, lr_warmup_val=FLAGS.child_lr_warmup_val, lr_warmup_steps=FLAGS.child_lr_warmup_steps, l2_reg=FLAGS.child_l2_reg, optim_moving_average=FLAGS.child_optim_moving_average, clip_mode="global", grad_bound=FLAGS.child_grad_bound, optim_algo="sgd", sync_replicas=FLAGS.child_sync_replicas, num_aggregate=FLAGS.child_num_aggregate, num_replicas=FLAGS.child_num_replicas, temperature=FLAGS.child_temperature, name="ptb_enas_model") if FLAGS.child_fixed_arc is None: controller_model = PTBEnasController( rhn_depth=FLAGS.child_rhn_depth, lstm_size=100, lstm_num_layers=1, lstm_keep_prob=1.0, tanh_constant=FLAGS.controller_tanh_constant, temperature=FLAGS.controller_temperature, lr_init=FLAGS.controller_lr, lr_dec_start=0, lr_dec_every=1000000, # never decrease learning rate l2_reg=FLAGS.controller_l2_reg, entropy_weight=FLAGS.controller_entropy_weight, bl_dec=FLAGS.controller_bl_dec, optim_algo="adam", sync_replicas=FLAGS.controller_sync_replicas, num_aggregate=FLAGS.controller_num_aggregate, num_replicas=FLAGS.controller_num_replicas) child_model.connect_controller(controller_model) controller_model.build_trainer(child_model) controller_ops = { "train_step": controller_model.train_step, "loss": controller_model.loss, "train_op": controller_model.train_op, "lr": controller_model.lr, "grad_norm": controller_model.grad_norm, "valid_ppl": controller_model.valid_ppl, "optimizer": controller_model.optimizer, "baseline": controller_model.baseline, "ppl": controller_model.ppl, "reward": controller_model.reward, "entropy": controller_model.sample_entropy, "sample_arc": controller_model.sample_arc, } else: child_model.connect_controller(None) controller_ops = None else: raise ValueError("Unknown search_for {}".format(FLAGS.search_for)) child_ops = { "global_step": child_model.global_step, "loss": child_model.loss, "train_op": child_model.train_op, "train_ppl": child_model.train_ppl, "train_reset": child_model.train_reset, "valid_reset": child_model.valid_reset, "test_reset": child_model.test_reset, "lr": child_model.lr, "grad_norm": child_model.grad_norm, "optimizer": child_model.optimizer, } ops = { "child": child_ops, "controller": controller_ops, "num_train_batches": child_model.num_train_batches, "eval_every": child_model.num_train_batches * FLAGS.eval_every_epochs, "eval_func": child_model.eval_once, } return ops
def get_ops(x_train, x_valid, x_test): """Create relevant models.""" ops = {} if FLAGS.search_for == "enas": assert FLAGS.child_lstm_hidden_size % FLAGS.child_block_size == 0, ( "--child_block_size has to divide child_lstm_hidden_size") if FLAGS.child_fixed_arc is not None: assert not FLAGS.controller_training, ( "with --child_fixed_arc, cannot train controller") child_model = PTBEnasChild( x_train, x_valid, x_test, rnn_l2_reg=FLAGS.child_rnn_l2_reg, rnn_slowness_reg=FLAGS.child_rnn_slowness_reg, rhn_depth=FLAGS.child_rhn_depth, fixed_arc=FLAGS.child_fixed_arc, batch_size=FLAGS.batch_size, bptt_steps=FLAGS.child_bptt_steps, lstm_num_layers=FLAGS.child_num_layers, lstm_hidden_size=FLAGS.child_lstm_hidden_size, lstm_e_keep=FLAGS.child_lstm_e_keep, lstm_x_keep=FLAGS.child_lstm_x_keep, lstm_h_keep=FLAGS.child_lstm_h_keep, lstm_o_keep=FLAGS.child_lstm_o_keep, lstm_l_skip=FLAGS.child_lstm_l_skip, vocab_size=10000, lr_init=FLAGS.child_lr, lr_dec_start=FLAGS.child_lr_dec_start lr_dec_start=0, lr_dec_every=1000000, # never decrease learning rate l2_reg=FLAGS.controller_l2_reg, entropy_weight=FLAGS.controller_entropy_weight, bl_dec=FLAGS.controller_bl_dec, optim_algo="adam", sync_replicas=FLAGS.controller_sync_replicas, num_aggregate=FLAGS.controller_num_aggregate, num_replicas=FLAGS.controller_num_replicas) child_model.connect_controller(controller_model) controller_model.build_trainer(child_model) controller_ops = { "train_step": controller_model.train_step, "loss": controller_model.loss, "train_op": controller_model.train_op, "lr": controller_model.lr, "grad_norm": controller_model.grad_norm, "valid_ppl": controller_model.valid_ppl, "optimizer": controller_model.optimizer, "baseline": controller_model.baseline, "ppl": controller_model.ppl, "reward": controller_model.reward, "entropy": controller_model.sample_entropy, "sample_arc": controller_model.sample_arc, } else: child_model.connect_controller(None) controller_ops = None