def _build_train(self): print("-" * 80) print("Build train graph") print(self.x_train) logits = self._model(self.x_train, is_training=True) """ # CIFAR10 to chess modification log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train ) """ print("@@@@@@@@@@@@@@@@@@@@@@@@") print(logits) print(self.y_train) print("@@@@@@@@@@@@@@@@@@@@@@@@") #log_probs = tf.keras.backend.categorical_crossentropy(target=logits, output=self.y_train, axis=1) log_probs = tf.keras.losses.MSE(logits, self.y_train) self.loss = tf.reduce_mean(log_probs) self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.cast(self.train_preds, tf.float32) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.cast(self.train_acc, tf.int32) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("Model has {} params".format(self.num_vars)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, )
def _build_train(self): print("-" * 80) print("Build train graph") logits = self._model(self.x_train, is_training=True) log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) #self._weight_transfer_loss() if self.use_aux_heads: log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.aux_logits, labels=self.y_train) self.aux_loss = tf.reduce_mean(log_probs) train_loss = self.loss + 0.4 * self.aux_loss else: train_loss = self.loss self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if (var.name.startswith(self.name) and "aux_head" not in var.name) ] for var in tf_variables: print(var) #print ('tf_variables!!!!!!!!') #print (tf_variables) self.num_vars = count_model_params(tf_variables) print("Model has {0} params".format(self.num_vars)) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( train_loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self): self.valid_acc = tf.placeholder(dtype=tf.float32, shape=[]) mask = tf.placeholder(dtype=tf.bool, shape=[self.batch_size]) self.cur_sample_entropy = tf.boolean_mask( self.sample_entropy, mask)[0] self.cur_sample_log_prob = tf.boolean_mask( self.sample_log_prob, mask)[0] self.cur_skip_count = tf.boolean_mask(self.skip_count, mask)[0] self.cur_skip_penaltys = tf.boolean_mask( self.skip_penaltys, mask)[0] reward = self.valid_acc normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2) self.skip_rate = tf.to_float(self.cur_skip_count) / normalize if self.entropy_weight is not None: reward += self.entropy_weight * self.cur_sample_entropy self.cur_sample_log_prob = tf.reduce_sum(self.cur_sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub( self.baseline, (1 - self.bl_dec) * (self.baseline - reward)) with tf.control_dependencies([baseline_update]): reward = tf.identity(reward) self.loss = self.cur_sample_log_prob * (reward - self.baseline) if self.skip_weight is not None: self.loss += self.skip_weight * self.cur_skip_penaltys self.train_step = tf.Variable( 0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [var for var in tf.trainable_variables() if var.name.startswith(self.name)] print("-" * 80) for var in tf_variables: print(var) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=False, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self, child_model): child_model.build_valid_rl() self.valid_acc = tf.cast( child_model.valid_shuffle_acc, tf.float32) / tf.cast( child_model.batch_size, tf.float32) self.reward = self.valid_acc normalize = tf.cast(self.num_layers * (self.num_layers - 1) / 2, tf.float32) self.skip_rate = tf.cast(self.skip_count, tf.float32) / normalize if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_prob * (self.reward - self.baseline) if self.skip_weight is not None: self.loss += self.skip_weight * self.skip_penaltys self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] print("-" * 80) for var in tf_variables: print(var) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, )
def _build_train(self): print("-" * 80) print("Build train graph") self.output, self.layers = output, layers = self._model( self.x_train, is_training=True) # update loss to SSE label_onehot = tf.cast(tf.one_hot(self.y_train, 10), tf.float32) with tf.name_scope('loss'): # TODO: change to reduce_mean? self.loss = 0.5 * tf.reduce_sum(tf.square(label_onehot - output)) self.train_preds = tf.argmax(output, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("Model has {} params".format(self.num_vars)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.grads, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, bitsW=self.bitsW, bitsG=self.bitsG, is_child=True)
def _build_train(self): print("-" * 80) print("Build train graph") output = self._model(self.x_train, is_training=True) target = (self.y_train - 127) / 127 self.loss = tf.reduce_mean( tf.losses.absolute_difference(target, output)) train_loss = self.loss self.train_psnr = psnr(self.y_train, output) tf.summary.scalar('loss', self.loss) output = output * 127 + 127 output = tf.clip_by_value(output, 0, 255) input_img = self.x_train*127 + 127 bicubic_img = tf.image.resize_bicubic(input_img, [128, 128]) tf.summary.image("output", tf.cast(output, tf.uint8)) tf.summary.image("target", tf.cast(self.y_train, tf.uint8)) tf.summary.image("input", tf.cast(input_img, tf.uint8)) tf.summary.image("bicubic", tf.cast(bicubic_img, tf.uint8)) tf_variables = [ var for var in tf.trainable_variables() if ( var.name.startswith(self.name) and "aux_head" not in var.name)] self.num_vars = count_model_params(tf_variables) print("Model has {0} params".format(self.num_vars)) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( train_loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas) tf.summary.scalar('lr', self.lr) self.summaries = tf.summary.merge_all()
def _build_train(self): print("Build train graph") all_h, self.train_reset = self._model(self.x_train, True, False) log_probs = self._get_log_probs(all_h, self.y_train, batch_size=self.batch_size, is_training=True) self.loss = tf.reduce_sum(log_probs) / tf.to_float(self.batch_size) self.train_ppl = tf.exp(tf.reduce_mean(log_probs)) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("-" * 80) print("Model has {} parameters".format(self.num_vars)) loss = self.loss if self.rnn_l2_reg is not None: loss += (self.rnn_l2_reg * tf.reduce_sum(all_h**2) / tf.to_float(self.batch_size)) if self.rnn_slowness_reg is not None: loss += (self.rnn_slowness_reg * self.all_h_diff / tf.to_float(self.batch_size)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") (self.train_op, self.lr, self.grad_norm, self.optimizer, self.grad_norms) = get_train_ops( loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_warmup_val=self.lr_warmup_val, lr_warmup_steps=self.lr_warmup_steps, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_dec_min=self.lr_dec_min, optim_algo=self.optim_algo, moving_average=self.optim_moving_average, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, get_grad_norms=True, )
def _build_train(self): print("-" * 80) print("Build train graph") logits = self._model(self.x_train, is_training=True) log_probs = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) outs = tf.nn.sigmoid(logits) self.train_preds = tf.greater_equal(outs, tf.constant(0.5)) self.train_preds = tf.to_int32(self.train_preds) self.y_train = tf.to_int32(self.y_train) self.soft_acc_count = tf.count_nonzero(tf.equal( self.train_preds, self.y_train), axis=1) self.train_acc = tf.to_int32(tf.equal(self.soft_acc_count, 6)) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("Model has {} params".format(self.num_vars)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self, child_model): child_model.build_valid_rl() self.valid_PSNR = tf.placeholder(dtype=tf.float32) self.reward = self.valid_PSNR # self.reward = tf.Print(self.reward, [self.reward], message="reward of this batch : ") if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) # self.sample_log_prob = tf.Print(self.sample_log_prob,[self.sample_log_prob], message="sample_log_prob of this batch : ") self.baseline = tf.placeholder(dtype=tf.float32) # baseline_update = tf.assign_sub( # self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) # # with tf.control_dependencies([baseline_update]): # self.reward = tf.identity(self.reward) self.loss = self.sample_log_prob * (self.reward - self.baseline) # self.loss = tf.Print(self.loss,[self.loss], message="loss of this batch : ") self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] print("-" * 80) for var in tf_variables: print(var) self.train_op, self.lr, self.grad_norm, self.optimizer, self.grads = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas) self.skip_rate = tf.constant(0.0, dtype=tf.float32)
def build_trainer(self, child_model): child_model.build_valid_rl() self.reward = tf.to_float(child_model.accuracy) if self.entropy_weight: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False, name='baseline') baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_prob * (self.reward - self.baseline) self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='train_step') tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] for var in tf_variables: print(var) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas) self.skip_rate = tf.constant(0.0, dtype=tf.float32)
def build_trainer(self, child_model): # actor self.valid_loss = tf.to_float(child_model.rl_loss) self.valid_loss = tf.stop_gradient(self.valid_loss) self.valid_loss = tf.minimum(self.valid_loss, 10.0) self.valid_ppl = tf.exp(self.valid_loss) self.reward = 80.0 / self.valid_ppl if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy # or baseline self.sample_log_probs = tf.reduce_sum(self.sample_log_probs) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_probs * (self.reward - self.baseline) self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def _build_train(self): print("Build train graph") if self.use_model == "SRCNN": self.train_preds = self._model_srcnn(self.x_train, True) elif self.use_model == "RDN": self.train_preds = self._model_RDN(self.x_train, True) else: self.train_preds = self._model(self.x_train, True) self.loss = tf.losses.mean_squared_error(labels=self.y_train, predictions=self.train_preds) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("-" * 80) for var in tf_variables: print(var) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_warmup_steps=self.lr_warmup_steps, lr_warmup_val=self.lr_warmup_val, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo # sync_replicas=self.sync_replicas, # num_aggregate=self.num_aggregate, # num_replicas=self.num_replicas )
def _build_train(self): print "Build train graph" logits = self._model(self.x_train, True) log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print "-" * 80 for var in tf_variables: print var self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self, child_model): child_model.build_valid_rl() lookup = tf.Variable([9., 25., 9., 3., 1.]) self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / tf.to_float(child_model.batch_size)) res = tf.reshape(self.sample_arc[0][1], [1]) for idx in range(1, self.num_cells): res = tf.concat( [res, tf.reshape(self.sample_arc[0][idx * 2 + 1], [1])], axis=0) operators_cell = tf.convert_to_tensor(res, dtype=tf.int32) latency_cell = tf.gather(lookup, operators_cell) latency_cell = tf.reduce_sum(latency_cell) res2 = tf.reshape(self.sample_arc[1][1], [1]) for idx in range(1, self.num_cells): res2 = tf.concat( [res2, tf.reshape(self.sample_arc[1][idx * 2 + 1], [1])], axis=0) operators_redu = tf.convert_to_tensor(res2, dtype=tf.int32) latency_redu = tf.gather(lookup, operators_redu) latency_redu = tf.reduce_sum(latency_redu) latency_sum = tf.math.add(latency_cell, latency_redu) alpha = tf.to_float(0.) beta = tf.to_float(-1.) threshold = tf.to_float(140.) latency_val = tf.cond(tf.math.greater(threshold, latency_sum), lambda: tf.math.pow(latency_sum, alpha), lambda: tf.math.pow(latency_sum, beta)) self.latency_sum = latency_sum if self.multi_objective == False: self.reward = self.valid_acc else: self.reward = self.valid_acc * latency_val # objective function if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_prob * (self.reward - self.baseline) self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] print("-" * 80) for var in tf_variables: print(var) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas) self.skip_rate = tf.constant(0.0, dtype=tf.float32)
def build_trainer(self, child_model): child_model.build_valid_rl() self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / tf.to_float(child_model.batch_size)) self.reward = self.valid_acc all_h = tf.concat(self.all_h, axis=0) value_function = tf.matmul(all_h, self.w_critic) advantage = value_function - self.reward critic_loss = tf.reduce_sum(advantage**2) critic_train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="critic_train_step") critic_train_op, _, _, _ = get_train_ops(critic_loss, [self.w_critic], critic_train_step, clip_mode=None, lr_init=1e-3, lr_dec_start=0, lr_dec_every=int(1e9), optim_algo="adam", sync_replicas=False) normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2) self.skip_rate = tf.to_float(self.skip_count) / normalize if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) # self.loss = self.sample_log_prob * (self.reward - self.baseline) search_probs = np.array( [child["N"] for child in self.root_node["children"]]) search_probs /= search_probs.sum() search_probs = np.expand_dims(search_probs, -1) search_probs = tf.convert_to_tensor(search_probs, dtype=tf.float32) self.sample_log_prob = tf.expand_dims(self.sample_log_prob, axis=0) self.loss = tf.matmul(search_probs, self.sample_log_prob) if self.skip_weight is not None: self.loss += self.skip_weight * self.skip_penaltys self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] print "-" * 80 for var in tf_variables: print var self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self, child_model): child_model.build_valid_rl() self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / tf.to_float(child_model.batch_size)) self.reward = self.valid_acc normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2) self.skip_rate = tf.to_float(self.skip_count) / normalize if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) # scale client and linear cost d1 = tf.floor( tf.log(child_model.client_online_cost) / tf.log(10.0)) + 1 self.client_cost = child_model.client_online_cost / 10**d1 d2 = tf.floor(tf.log(child_model.linear_cost) / tf.log(10.0)) + 1 self.linear_cost = child_model.linear_cost / 10**d2 # modify the reward #self.reward -= 10*self.skip_rate #self.reward -= child_model.linear_cost/100000000.0 #self.reward -= 0.01*(child_model.client_online_cost/(64*24*10000) + 32.8) # client online cost #self.reward -= 3*0.0357*(child_model.client_online_cost*0.187382/3225600+13.86) #self.reward -= 3*0.0094*(child_model.client_online_cost*0.187382/3225600+52.393) #self.reward -= 3*0.0025*(child_model.client_online_cost*0.187382/3225600+197.73) # linear cost #self.reward -= 3*self.linear_cost self.loss = self.sample_log_prob * (self.reward - self.baseline) #check no skip if not self.no_skip: if self.skip_weight is not None: self.loss += self.skip_weight * self.skip_penaltys self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] print "-" * 80 for var in tf_variables: print var self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self, child_model): # actor child_model.build_valid_rl() self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / tf.to_float(child_model.batch_size)) self.reward = self.valid_acc if self.use_critic: # critic all_h = tf.concat(self.all_h, axis=0) value_function = tf.matmul(all_h, self.w_critic) advantage = value_function - self.reward critic_loss = tf.reduce_sum(advantage ** 2) self.baseline = tf.reduce_mean(value_function) self.loss = -tf.reduce_mean(self.sample_log_probs * advantage) critic_train_step = tf.Variable( 0, dtype=tf.int32, trainable=False, name="critic_train_step") critic_train_op, _, _, _ = get_train_ops( critic_loss, [self.w_critic], critic_train_step, clip_mode=None, lr_init=1e-3, lr_dec_start=0, lr_dec_every=int(1e9), optim_algo="adam", sync_replicas=False) else: # or baseline self.sample_log_probs = tf.reduce_sum(self.sample_log_probs) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub( self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_probs * (self.reward - self.baseline) self.train_step = tf.Variable( 0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [var for var in tf.trainable_variables() if var.name.startswith(self.name) and "w_critic" not in var.name] print ("-" * 80) for var in tf_variables: print (var) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas) if self.use_critic: self.train_op = tf.group(self.train_op, critic_train_op)
class PTBEnasController(object): def __init__(self, rhn_depth=5, lstm_size=32, lstm_num_layers=2, lstm_keep_prob=1.0, tanh_constant=None, temperature=None, num_funcs=2, lr_init=1e-3, lr_dec_start=0, lr_dec_every=100, lr_dec_rate=0.9, l2_reg=0, entropy_weight=None, clip_mode=None, grad_bound=None, bl_dec=0.999, optim_algo="adam", sync_replicas=False, num_aggregate=None, num_replicas=None, name="controller"): print("-" * 80) print("Building PTBEnasController") self.rhn_depth = rhn_depth self.lstm_size = lstm_size self.lstm_num_layers = lstm_num_layers self.lstm_keep_prob = lstm_keep_prob self.tanh_constant = tanh_constant self.temperature = temperature self.num_funcs = num_funcs self.lr_init = lr_init self.lr_dec_start = lr_dec_start self.lr_dec_every = lr_dec_every self.lr_dec_rate = lr_dec_rate self.l2_reg = l2_reg self.entropy_weight = entropy_weight self.clip_mode = clip_mode self.grad_bound = grad_bound self.bl_dec = bl_dec self.optim_algo = optim_algo self.sync_replicas = sync_replicas self.num_aggregate = num_aggregate self.num_replicas = num_replicas self.name = name self._create_params() self._build_sampler() def _create_params(self): #初始化参数 #使用生成均匀分布的初始化器 initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1) with tf.variable_scope(self.name, initializer=initializer): with tf.variable_scope("lstm"): self.w_lstm = [] for layer_id in xrange(self.lstm_num_layers): with tf.variable_scope("layer_{}".format(layer_id)): w = tf.get_variable("w", [2 * self.lstm_size, 4 * self.lstm_size]) self.w_lstm.append(w) num_funcs = self.num_funcs with tf.variable_scope("embedding"): self.g_emb = tf.get_variable("g_emb", [1, self.lstm_size]) self.w_emb = tf.get_variable("w", [num_funcs, self.lstm_size]) with tf.variable_scope("softmax"): self.w_soft = tf.get_variable("w", [self.lstm_size, num_funcs]) with tf.variable_scope("attention"): self.attn_w_1 = tf.get_variable("w_1", [self.lstm_size, self.lstm_size]) self.attn_w_2 = tf.get_variable("w_2", [self.lstm_size, self.lstm_size]) self.attn_v = tf.get_variable("v", [self.lstm_size, 1]) def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" arc_seq = [] sample_log_probs = [] sample_entropy = [] all_h = [] all_h_w = [] # sampler ops inputs = self.g_emb prev_c, prev_h = [], [] #prev_c=[0,0,0...,0],总共有self.lstm_num_layers*self.lstm_size个0 #prev_h一样 for _ in xrange(self.lstm_num_layers): prev_c.append(tf.zeros([1, self.lstm_size], dtype=tf.float32)) prev_h.append(tf.zeros([1, self.lstm_size], dtype=tf.float32)) # used = tf.zeros([self.rhn_depth, 2], dtype=tf.int32) for layer_id in xrange(self.rhn_depth): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h all_h.append(next_h[-1]) all_h_w.append(tf.matmul(next_h[-1], self.attn_w_1)) if layer_id > 0: query = tf.matmul(next_h[-1], self.attn_w_2) query = query + tf.concat(all_h_w[:-1], axis=0) query = tf.tanh(query) logits = tf.matmul(query, self.attn_v) logits = tf.reshape(logits, [1, layer_id]) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) diff = tf.to_float(layer_id - tf.range(0, layer_id)) ** 2 logits -= tf.reshape(diff, [1, layer_id]) / 6.0 skip_index = tf.multinomial(logits, 1) skip_index = tf.to_int32(skip_index) skip_index = tf.reshape(skip_index, [1]) arc_seq.append(skip_index) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=skip_index) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup( tf.concat(all_h[:-1], axis=0), skip_index) inputs /= (0.1 + tf.to_float(layer_id - skip_index)) else: inputs = self.g_emb next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h[-1], self.w_soft) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) func = tf.multinomial(logits, 1) func = tf.to_int32(func) func = tf.reshape(func, [1]) arc_seq.append(func) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=func) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup(self.w_emb, func) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = arc_seq self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs)) sample_entropy = tf.concat(sample_entropy, axis=0) self.sample_entropy = tf.reduce_sum(sample_entropy) self.all_h = all_h #这个函数构建了reward的计算,生成了训练的操作 def build_trainer(self, child_model): # actor #tf.to_float的作用是将tensor转化为float #controller的损失来自于子模型的rl_loss self.valid_loss = tf.to_float(child_model.rl_loss) #使用tf.stop_gradient阻挡valid_loss的BP self.valid_loss = tf.stop_gradient(self.valid_loss) #计算PPL=e^valid_loss self.valid_ppl = tf.exp(self.valid_loss) #reward=80/ppl self.reward = 80.0 / self.valid_ppl #并不知道entropy_weight是什么 if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy # or baseline self.sample_log_probs = tf.reduce_sum(self.sample_log_probs) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub( self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) #先计算baseline_update再计算reward with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) #损失函数的计算:loss=sample_log_probs*(reward-baseline) self.loss = self.sample_log_probs * (self.reward - self.baseline) #创建变量train_step,这个变量表示什么 self.train_step = tf.Variable( 0, dtype=tf.int32, trainable=False, name="train_step") #tf_variables存储了所有可训练参数,这些参数满足一个条件,它们必须以self.name开头,self.name默认是"controller" tf_variables = [var for var in tf.trainable_variables() if var.name.startswith(self.name)] self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)