Exemplo n.º 1
0
    def _train_loop(self, optimization_op, loss_op, batches, hooks, max_epochs,
                    summaries, summary_writer, **kwargs):
        logger.info("Start training...")
        for i in range(1, max_epochs + 1):
            for j, batch in enumerate(batches):
                feed_dict = self.model_module.convert_to_feed_dict(batch)

                if summaries is not None:
                    step, sums, current_loss, _ = self.session.run(
                        [
                            tf.train.get_global_step(), summaries, loss_op,
                            optimization_op
                        ],
                        feed_dict=feed_dict)
                    summary_writer.add_summary(sums, step)
                else:
                    current_loss, _ = self.session.run(
                        [loss_op, optimization_op], feed_dict=feed_dict)

                for hook in hooks:
                    hook.at_iteration_end(i, current_loss, set_name='train')

            # calling post-epoch hooks
            for hook in hooks:
                hook.at_epoch_end(i)
Exemplo n.º 2
0
    def _setup_training(self, batch_size, clip, optimizer, training_set,
                        summary_writer, l2, clip_op, **kwargs):
        global_step = tf.train.create_global_step()
        if not self._is_setup:
            # First setup shared resources, e.g., vocabulary. This depends on the input module.
            logger.info("Setting up model...")
            self.setup_from_data(training_set, is_training=True)
        logger.info("Preparing training data...")
        batches = self.input_module.batch_generator(training_set,
                                                    batch_size,
                                                    is_eval=False)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
        loss = self.model_module.tensors[Ports.loss]
        summaries = None
        if summary_writer is not None:
            summaries = tf.summary.merge_all()
        if l2:
            loss += tf.add_n(
                [tf.nn.l2_loss(v)
                 for v in self.model_module.train_variables]) * l2
        if clip:
            gradients = optimizer.compute_gradients(loss)
            if clip_op == tf.clip_by_value:
                gradients = [(tf.clip_by_value(grad, clip[0], clip[1]), var)
                             for grad, var in gradients if grad]
            elif clip_op == tf.clip_by_norm:
                gradients = [(tf.clip_by_norm(grad, clip), var)
                             for grad, var in gradients if grad]
            min_op = optimizer.apply_gradients(gradients, global_step)
        else:
            min_op = optimizer.minimize(loss, global_step)

        variable_size = lambda v: reduce(lambda x, y: x * y,
                                         v.get_shape().as_list()
                                         ) if v.get_shape() else 1
        num_params = sum(
            variable_size(v) for v in self.model_module.train_variables)
        logger.info("Number of parameters: %d" % num_params)

        # initialize non model variables like learning rate, optimizer vars ...
        self.session.run([
            v.initializer for v in tf.global_variables()
            if v not in self.model_module.variables
        ])
        return batches, loss, min_op, summaries
Exemplo n.º 3
0
    def _setup_training(self, batch_size, clip, optimizer, training_set,
                        summary_writer, l2, clip_op, **kwargs):
        global_step = tf.train.create_global_step()
        if not self._is_setup:
            # First setup shared resources, e.g., vocabulary. This depends on the input module.
            logger.info("Setting up model...")
            self.setup_from_data(training_set, is_training=True)
        logger.info("Preparing training data...")
        batches = self.input_module.batch_generator(training_set,
                                                    batch_size,
                                                    is_eval=False)
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
        loss = self.model_module.tensors[Ports.loss]
        summaries = None
        if summary_writer is not None:
            summaries = tf.summary.merge_all()
        if l2:
            loss += tf.add_n(
                [tf.nn.l2_loss(v)
                 for v in self.model_module.train_variables]) * l2
        if clip:
            gradients = optimizer.compute_gradients(loss)
            if clip_op == tf.clip_by_value:
                gradients = [(tf.clip_by_value(grad, clip[0], clip[1]), var)
                             for grad, var in gradients if grad]
            elif clip_op == tf.clip_by_norm:
                gradients = [(tf.clip_by_norm(grad, clip), var)
                             for grad, var in gradients if grad]
            min_op = optimizer.apply_gradients(gradients, global_step)
        else:
            min_op = optimizer.minimize(loss, global_step)

        variable_size = lambda v: reduce(lambda x, y: x * y,
                                         v.get_shape().as_list()
                                         ) if v.get_shape() else 1
        num_params = sum(
            variable_size(v) for v in self.model_module.train_variables)
        logger.info("Number of parameters: %d" % num_params)

        try:
            param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
                tf.get_default_graph(),
                tfprof_options=tf.contrib.tfprof.model_analyzer.
                TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
            logging.info('total_params: %d\n' % param_stats.total_parameters)
        except Exception as err:
            logging.error(err)

        try:
            # base param statistics
            dnn_model_trainable_variables = self.model_module.train_variables
            variable_size = lambda v: reduce(lambda x, y: x * y,
                                             v.get_shape().as_list()
                                             ) if v.get_shape() else 1

            logging.info("Trainable params:")
            var_with_size = [(v.name, variable_size(v))
                             for v in dnn_model_trainable_variables]
            var_with_size.sort(key=lambda v: [0])
            for v in var_with_size:
                logging.info("%s: %s" % (v[0], v[1]))

            num_params = sum(
                variable_size(v) for v in dnn_model_trainable_variables)
            logger.info("Total Number of parameters: %d" % num_params)

            # Full param statistics
            param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
                tf.get_default_graph(),
                tfprof_options=tf.contrib.tfprof.model_analyzer.
                TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
            logging.info('total_params: %d\n' % param_stats.total_parameters)
        except Exception as err:
            logging.error(err)

        # initialize non model variables like learning rate, optimizer vars ...
        self.session.run([
            v.initializer for v in tf.global_variables()
            if v not in self.model_module.variables
        ])
        return batches, loss, min_op, summaries