def _train_loop(self, optimization_op, loss_op, batches, hooks, max_epochs, summaries, summary_writer, **kwargs): logger.info("Start training...") for i in range(1, max_epochs + 1): for j, batch in enumerate(batches): feed_dict = self.model_module.convert_to_feed_dict(batch) if summaries is not None: step, sums, current_loss, _ = self.session.run( [ tf.train.get_global_step(), summaries, loss_op, optimization_op ], feed_dict=feed_dict) summary_writer.add_summary(sums, step) else: current_loss, _ = self.session.run( [loss_op, optimization_op], feed_dict=feed_dict) for hook in hooks: hook.at_iteration_end(i, current_loss, set_name='train') # calling post-epoch hooks for hook in hooks: hook.at_epoch_end(i)
def _setup_training(self, batch_size, clip, optimizer, training_set, summary_writer, l2, clip_op, **kwargs): global_step = tf.train.create_global_step() if not self._is_setup: # First setup shared resources, e.g., vocabulary. This depends on the input module. logger.info("Setting up model...") self.setup_from_data(training_set, is_training=True) logger.info("Preparing training data...") batches = self.input_module.batch_generator(training_set, batch_size, is_eval=False) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) loss = self.model_module.tensors[Ports.loss] summaries = None if summary_writer is not None: summaries = tf.summary.merge_all() if l2: loss += tf.add_n( [tf.nn.l2_loss(v) for v in self.model_module.train_variables]) * l2 if clip: gradients = optimizer.compute_gradients(loss) if clip_op == tf.clip_by_value: gradients = [(tf.clip_by_value(grad, clip[0], clip[1]), var) for grad, var in gradients if grad] elif clip_op == tf.clip_by_norm: gradients = [(tf.clip_by_norm(grad, clip), var) for grad, var in gradients if grad] min_op = optimizer.apply_gradients(gradients, global_step) else: min_op = optimizer.minimize(loss, global_step) variable_size = lambda v: reduce(lambda x, y: x * y, v.get_shape().as_list() ) if v.get_shape() else 1 num_params = sum( variable_size(v) for v in self.model_module.train_variables) logger.info("Number of parameters: %d" % num_params) # initialize non model variables like learning rate, optimizer vars ... self.session.run([ v.initializer for v in tf.global_variables() if v not in self.model_module.variables ]) return batches, loss, min_op, summaries
def _setup_training(self, batch_size, clip, optimizer, training_set, summary_writer, l2, clip_op, **kwargs): global_step = tf.train.create_global_step() if not self._is_setup: # First setup shared resources, e.g., vocabulary. This depends on the input module. logger.info("Setting up model...") self.setup_from_data(training_set, is_training=True) logger.info("Preparing training data...") batches = self.input_module.batch_generator(training_set, batch_size, is_eval=False) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) loss = self.model_module.tensors[Ports.loss] summaries = None if summary_writer is not None: summaries = tf.summary.merge_all() if l2: loss += tf.add_n( [tf.nn.l2_loss(v) for v in self.model_module.train_variables]) * l2 if clip: gradients = optimizer.compute_gradients(loss) if clip_op == tf.clip_by_value: gradients = [(tf.clip_by_value(grad, clip[0], clip[1]), var) for grad, var in gradients if grad] elif clip_op == tf.clip_by_norm: gradients = [(tf.clip_by_norm(grad, clip), var) for grad, var in gradients if grad] min_op = optimizer.apply_gradients(gradients, global_step) else: min_op = optimizer.minimize(loss, global_step) variable_size = lambda v: reduce(lambda x, y: x * y, v.get_shape().as_list() ) if v.get_shape() else 1 num_params = sum( variable_size(v) for v in self.model_module.train_variables) logger.info("Number of parameters: %d" % num_params) try: param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) logging.info('total_params: %d\n' % param_stats.total_parameters) except Exception as err: logging.error(err) try: # base param statistics dnn_model_trainable_variables = self.model_module.train_variables variable_size = lambda v: reduce(lambda x, y: x * y, v.get_shape().as_list() ) if v.get_shape() else 1 logging.info("Trainable params:") var_with_size = [(v.name, variable_size(v)) for v in dnn_model_trainable_variables] var_with_size.sort(key=lambda v: [0]) for v in var_with_size: logging.info("%s: %s" % (v[0], v[1])) num_params = sum( variable_size(v) for v in dnn_model_trainable_variables) logger.info("Total Number of parameters: %d" % num_params) # Full param statistics param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=tf.contrib.tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) logging.info('total_params: %d\n' % param_stats.total_parameters) except Exception as err: logging.error(err) # initialize non model variables like learning rate, optimizer vars ... self.session.run([ v.initializer for v in tf.global_variables() if v not in self.model_module.variables ]) return batches, loss, min_op, summaries