def model_fn_body(self, features): hparams = self._hparams # TODO(rshin): Give identity_module lower weight by default. multi_conv = multi_conv_module(kernel_sizes=[(3, 3), (5, 5), (7, 7)], seps=[0, 1]) conv_modules = [multi_conv, identity_module] activation_modules = [ identity_module, lambda x, _: tf.nn.relu(x), lambda x, _: tf.nn.elu(x), lambda x, _: tf.tanh(x) ] norm_modules = [identity_module, layernorm_module, noamnorm_module] binary_modules = [ first_binary_module, second_binary_module, sum_binary_module, shakeshake_binary_module ] inputs = features["inputs"] def run_unary(x, name): """A single step of unary modules.""" x_shape = x.get_shape() with tf.variable_scope(name): with tf.variable_scope("norm"): x = run_unary_modules(norm_modules, x, hparams) x.set_shape(x_shape) with tf.variable_scope("activation"): x = run_unary_modules(activation_modules, x, hparams) x.set_shape(x_shape) with tf.variable_scope("conv"): x = run_unary_modules(conv_modules, x, hparams) x.set_shape(x_shape) return tf.nn.dropout(x, 1.0 - hparams.dropout), batch_deviation(x) cur1, cur2, cur3, extra_loss = inputs, inputs, inputs, 0.0 cur_shape = inputs.get_shape() for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): cur1, loss1 = run_unary(cur1, "unary1") cur2, loss2 = run_unary(cur2, "unary2") cur3, loss3 = run_unary(cur2, "unary3") extra_loss += (loss1 + loss2 + loss3) / float( hparams.num_hidden_layers) with tf.variable_scope("binary1"): next1 = run_binary_modules(binary_modules, cur1, cur2, hparams) next1.set_shape(cur_shape) with tf.variable_scope("binary2"): next2 = run_binary_modules(binary_modules, cur1, cur3, hparams) next2.set_shape(cur_shape) with tf.variable_scope("binary3"): next3 = run_binary_modules(binary_modules, cur2, cur3, hparams) next3.set_shape(cur_shape) cur1, cur2, cur3 = next1, next2, next3 anneal = common_layers.inverse_exp_decay(hparams.anneal_until) extra_loss *= hparams.batch_deviation_loss_factor * anneal return cur1, extra_loss
def run_unary_modules_basic(modules, cur, hparams): """Run unary modules.""" selection_var = tf.get_variable("selection", [len(modules)], initializer=tf.zeros_initializer()) inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) selected_weights = tf.nn.softmax(selection_var * inv_t) all_res = [modules[n](cur, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0)
def run_unary_modules_basic(modules, cur, hparams): """Run unary modules.""" selection_weights = create_selection_weights( "selection", "softmax", shape=[len(modules)], inv_t=100.0 * common_layers.inverse_exp_decay(hparams.anneal_until, min_value=0.01)) all_res = [modules[n](cur, hparams) for n in xrange(len(modules))] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0)
def model_fn_body(self, features): hparams = self._hparams conv_modules = [ conv_module(kw, kw, sep, div) for kw in [3, 5, 7] for sep in [0, 1] for div in [1] ] + [identity_module] activation_modules = [ identity_module, lambda x, _: tf.nn.relu(x), lambda x, _: tf.nn.elu(x), lambda x, _: tf.tanh(x) ] norm_modules = [identity_module, layernorm_module, noamnorm_module] binary_modules = [ first_binary_module, second_binary_module, sum_binary_module, shakeshake_binary_module ] inputs = features["inputs"] def run_unary(x, name): """A single step of unary modules.""" x_shape = x.get_shape() with tf.variable_scope(name): with tf.variable_scope("norm"): x = run_unary_modules(norm_modules, x, hparams) x.set_shape(x_shape) with tf.variable_scope("activation"): x = run_unary_modules(activation_modules, x, hparams) x.set_shape(x_shape) with tf.variable_scope("conv"): x = run_unary_modules(conv_modules, x, hparams) x.set_shape(x_shape) return tf.nn.dropout(x, 1.0 - hparams.dropout), batch_deviation(x) cur1, cur2, extra_loss = inputs, inputs, 0.0 cur_shape = inputs.get_shape() for i in xrange(hparams.num_hidden_layers): with tf.variable_scope("layer_%d" % i): cur1, loss1 = run_unary(cur1, "unary1") cur2, loss2 = run_unary(cur2, "unary2") extra_loss += (loss1 + loss2) / float( hparams.num_hidden_layers) with tf.variable_scope("binary1"): next1 = run_binary_modules(binary_modules, cur1, cur2, hparams) next1.set_shape(cur_shape) with tf.variable_scope("binary2"): next2 = run_binary_modules(binary_modules, cur1, cur2, hparams) next2.set_shape(cur_shape) cur1, cur2 = next1, next2 anneal = common_layers.inverse_exp_decay(hparams.anneal_until) extra_loss *= hparams.batch_deviation_loss_factor * anneal return cur1, extra_loss
def run_unary_modules_sample(modules, cur, hparams, k): """Run modules, sampling k.""" selection_weights = create_selection_weights( "selection", ("softmax_topk", k), shape=[len(modules)], inv_t=100.0 * common_layers.inverse_exp_decay(hparams.anneal_until, min_value=0.01)) all_res = [ tf.cond(tf.less(selection_weights.normalized[n], 1e-6), lambda: tf.zeros_like(cur), lambda i=n: modules[i](cur, hparams)) for n in xrange(len(modules)) ] all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0)
def run_unary_modules_sample(modules, cur, hparams, k): """Run modules, sampling k.""" selection_var = tf.get_variable("selection", [len(modules)], initializer=tf.zeros_initializer()) selection = tf.multinomial(tf.expand_dims(selection_var, axis=0), k) selection = tf.squeeze(selection, axis=0) # [k] selected classes. to_run = tf.one_hot(selection, len(modules)) # [k x nmodules] one-hot. to_run = tf.reduce_sum(to_run, axis=0) # [nmodules], 0=not run, 1=run. all_res = [ tf.cond(tf.less(to_run[n], 0.1), lambda: tf.zeros_like(cur), lambda i=n: modules[i](cur, hparams)) for n in xrange(len(modules)) ] inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01) selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 * (1.0 - to_run)) all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0) res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1]) return tf.reduce_sum(res, axis=0)