Exemplos de gradient_clip em Python, exemplos de chatbot.modelhelper.gradient_clip em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: discriminator.py Projeto: adarve/ChatLearner

    def __init__(self, generator, sample_id):
        self.global_step = generator.global_step
        self.training = generator.training
        self.batch_input = generator.batch_input
        self.vocab_size = generator.vocab_size
        self.hparams = generator.hparams
        self.time_major = self.hparams.time_major
        self.sample_id = tf.transpose(sample_id)
        self.reverse_vocab_table = generator.reverse_vocab_table

        # create two copies of discriminator, one for real pairs and one for fake pairs
        # they share the same underlying variables
        with tf.name_scope("real_discriminator"):
            with tf.variable_scope("discriminator"):
                self.predict_real = self._build_disc(
                    self.batch_input.original_source,
                    self.batch_input.original_target)

        with tf.name_scope("fake_discriminator"):
            with tf.variable_scope("discriminator", reuse=True):
                self.predict_fake = self._build_disc(
                    self.batch_input.original_source, self.sample_id)

        with tf.name_scope("discriminator_loss"):
            zeros = tf.zeros(tf.shape(self.predict_real[:, 0]), dtype=tf.int32)
            loss_real = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.predict_real, labels=zeros))
            self.accuracy_real = tf.metrics.accuracy(
                zeros, tf.argmax(self.predict_real, 1))

            ones = tf.ones(tf.shape(self.predict_fake[:, 0]), dtype=tf.int32)
            loss_fake = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.predict_fake, labels=ones))
            self.accuracy_fake = tf.metrics.accuracy(
                ones, tf.argmax(self.predict_fake, 1))

            self.loss = loss_real + loss_fake
            self.gan_loss = -tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.predict_fake, labels=ones))

        with tf.name_scope("discriminator_train"):
            discrim_tvars = [
                var for var in tf.trainable_variables()
                if var.name.startswith("discriminator")
            ]
            discrim_optim = tf.train.AdamOptimizer()
            gradients = tf.gradients(self.loss, discrim_tvars)
            clipped_gradients, _ = model_helper.gradient_clip(
                gradients, max_gradient_norm=self.hparams.max_gradient_norm)

            self.update = discrim_optim.apply_gradients(
                zip(clipped_gradients, discrim_tvars))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: modelcreator.py Projeto: jellyear/HeroBot

    def __init__(self, training, tokenized_data, batch_input, scope=None):
        """
        Create the model.

        Args:
            training: A boolean value to indicate whether this model will be used for training.
            tokenized_data: The data object containing all information required for the model.
            scope: scope of the model.
        """
        self.training = training
        self.batch_input = batch_input
        self.vocab_table = tokenized_data.vocab_table
        self.vocab_size = tokenized_data.vocab_size
        self.reverse_vocab_table = tokenized_data.reverse_vocab_table

        hparams = tokenized_data.hparams
        self.hparams = hparams

        self.num_layers = hparams.num_layers
        self.time_major = hparams.time_major

        # Initializer
        initializer = model_helper.get_initializer(hparams.init_op,
                                                   hparams.random_seed,
                                                   hparams.init_weight)
        tf.get_variable_scope().set_initializer(initializer)

        # Embeddings
        self.embedding = (model_helper.create_embbeding(
            vocab_size=self.vocab_size,
            embed_size=hparams.num_units,
            scope=scope))
        # This batch_size might vary among each batch instance due to the bucketing and/or reach
        # the end of the training set. Treat it as size_of_the_batch.
        self.batch_size = tf.size(self.batch_input.source_sequence_length)

        # Projection
        with tf.variable_scope(scope or "build_network"):
            with tf.variable_scope("decoder/output_projection"):
                self.output_layer = layers_core.Dense(self.vocab_size,
                                                      use_bias=False,
                                                      name="output_projection")

        # Training or inference graph
        print("# Building graph for the model ...")
        res = self.build_graph(hparams, scope=scope)

        if training:
            self.train_loss = res[1]
            self.word_count = tf.reduce_sum(self.batch_input.source_sequence_length) + \
                              tf.reduce_sum(self.batch_input.target_sequence_length)
            # Count the number of predicted words for compute perplexity.
            self.predict_count = tf.reduce_sum(
                self.batch_input.target_sequence_length)
        else:
            self.infer_logits, _, self.final_context_state, self.sample_id = res
            self.sample_words = self.reverse_vocab_table.lookup(
                tf.to_int64(self.sample_id))

        self.global_step = tf.Variable(0, trainable=False)

        params = tf.trainable_variables()

        # Gradients update operation for training the model.
        if training:
            self.learning_rate = tf.placeholder(tf.float32,
                                                shape=[],
                                                name='learning_rate')
            opt = tf.train.AdamOptimizer(self.learning_rate)

            gradients = tf.gradients(self.train_loss, params)

            clipped_gradients, gradient_norm_summary = model_helper.gradient_clip(
                gradients, max_gradient_norm=hparams.max_gradient_norm)

            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

            # Summary
            self.train_summary = tf.summary.merge([
                tf.summary.scalar("learning_rate", self.learning_rate),
                tf.summary.scalar("train_loss", self.train_loss),
            ] + gradient_norm_summary)
        else:
            self.infer_summary = tf.no_op()

        # Saver
        self.saver = tf.train.Saver(tf.global_variables())

        # Print trainable variables
        if training:
            print("# Trainable variables:")
            for param in params:
                print("  {}, {}, {}".format(param.name, str(param.get_shape()),
                                            param.op.device))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: modelcreator.py Projeto: adarve/ChatLearner

    def __init__(self, training, tokenized_data, batch_input, scope=None):
        """
        Create the model.

        Args:
            training: A boolean value to indicate whether this model will be used for training.
            tokenized_data: The data object containing all information required for the model.
            scope: scope of the model.
        """
        self.training = training
        self.batch_input = batch_input
        self.vocab_list = tokenized_data.vocab_list
        self.vocab_table = tokenized_data.vocab_table
        self.vocab_size = tokenized_data.vocab_size
        self.reverse_vocab_table = tokenized_data.reverse_vocab_table

        hparams = tokenized_data.hparams
        self.hparams = hparams

        self.num_layers = hparams.num_layers
        self.time_major = hparams.time_major

        # Initializer
        initializer = model_helper.get_initializer(
            hparams.init_op, hparams.random_seed, hparams.init_weight)
        tf.get_variable_scope().set_initializer(initializer)

        # Embeddings
        self.embedding = model_helper.create_embedding(vocab_size=self.vocab_size,
                                                        embed_size=hparams.num_units,
                                                        trainable=hparams.train_embeddings,
                                                        scope=scope)

        if training and hparams.pretrained_embeddings:
            from settings import PROJECT_ROOT
            pretrained_embeddings_file = os.path.join(PROJECT_ROOT, 'Data', 'Corpus', hparams.pretrained_embeddings)
            self.pretrained = model_helper.populate_embedding(self.embedding,
                                                                self.vocab_list,
                                                                pretrained_embeddings_file)
        else:
            self.pretrained = None

        # This batch_size might vary among each batch instance due to the bucketing and/or reach
        # the end of the training set. Treat it as size_of_the_batch.
        self.batch_size = tf.size(self.batch_input.source_sequence_length)

        # Projection
        with tf.variable_scope(scope or "build_network"):
            with tf.variable_scope("decoder/output_projection"):
                self.output_layer = layers_core.Dense(
                    self.vocab_size, use_bias=False, name="output_projection")

        self.global_step = tf.Variable(0, trainable=False)

        # Training or inference graph
        print("# Building graph for the model ...")
        res = self.build_graph(hparams, scope=scope)

        if training:
            self.train_loss = res[1]
            self.word_count = tf.reduce_sum(self.batch_input.source_sequence_length) + \
                              tf.reduce_sum(self.batch_input.target_sequence_length)
            # Count the number of predicted words for compute perplexity.
            self.predict_count = tf.reduce_sum(self.batch_input.target_sequence_length)
            self.sample_id = res[-2]
            self.greedy_sample_id = res[-1]
        else:
            self.infer_logits, _, self.final_context_state, self.sample_id, self.greedy_sample_id = res
            self.sample_words = self.reverse_vocab_table.lookup(tf.to_int64(self.sample_id))

        gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("dynamic_seq2seq")]

        # Gradients update operation for training the model.
        if training:
            #with tf.control_dependencies([]):

            self.learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate')
            opt = tf.train.AdamOptimizer(self.learning_rate)

            #depends = [self.disc.update] if self.disc else []
            #with tf.control_dependencies(depends):
            gradients = tf.gradients(self.train_loss, gen_tvars)

            clipped_gradients, _ = model_helper.gradient_clip(
                gradients, max_gradient_norm=hparams.max_gradient_norm)

            self.train_discriminator = tf.placeholder(tf.bool, shape=[], name='train_discriminator')

            update_gen = opt.apply_gradients(zip(clipped_gradients, gen_tvars), global_step=self.global_step)
            #update_gen = tf.cond(tf.logical_not(self.train_discriminator), lambda: update_gen, lambda: tf.no_op())

            # Need to review this
            # I try to make the discriminator train only the accuracy falls a lot
            update_disc = tf.cond(self.train_discriminator, lambda: self.disc.update, lambda: tf.no_op())

            self.update = tf.group(self.disc.loss, self.disc.accuracy_real[1], self.disc.accuracy_fake[1],
                                   update_disc, self.train_loss, update_gen)

            scalars = [
                tf.summary.scalar("learning_rate", self.learning_rate),
                tf.summary.scalar("train_loss", self.train_loss),
            ]
            if self.disc:
                scalars += self.disc.metrics()

            # Summary
            self.train_summary = tf.summary.merge(scalars)
        else:
            self.infer_summary = tf.no_op()

        # Saver
        if self.hparams.only_restore_gan:
            variables = [v for v in tf.global_variables() if not v.name.startswith('discriminator')]
        else:
            variables = tf.global_variables()
        self.saver = tf.train.Saver(variables, max_to_keep=2)

        # Print trainable variables
        if training:
            print("# Trainable variables:")
            for param in tf.trainable_variables():
                print("  {}, {}, {}".format(param.name, str(param.get_shape()), param.op.device))

Exemplo n.º 4

0

Exibir arquivo

Arquivo: modelcreator.py Projeto: chowdhury-najir/MyPhD-Chatbot

    def __init__(self, training, tokenized_data, batch_input, scope=None):
        """
        Create the model.

        Args:
            training: A boolean value to indicate whether this model will be used for training.
            tokenized_data: The data object containing all information required for the model.
            scope: scope of the model.
        """
        self.training = training
        self.batch_input = batch_input
        self.vocab_table = tokenized_data.vocab_table
        self.vocab_size = tokenized_data.vocab_size
        self.reverse_vocab_table = tokenized_data.reverse_vocab_table

        hparams = tokenized_data.hparams
        self.hparams = hparams

        self.num_layers = hparams.num_layers
        self.time_major = hparams.time_major

        # Initializer
        initializer = model_helper.get_initializer(hparams.init_op,
                                                   hparams.random_seed,
                                                   hparams.init_weight)
        tf.get_variable_scope().set_initializer(initializer)

        # Embeddings
        self.embedding = (model_helper.create_embbeding(
            vocab_size=self.vocab_size,
            embed_size=hparams.num_units,
            scope=scope))
        # This batch_size might vary among each batch instance due to the bucketing and/or reach
        # the end of the training set. Treat it as size_of_the_batch.
        self.batch_size = tf.size(self.batch_input.source_sequence_length)

        # Projection
        with tf.variable_scope(scope or "build_network"):
            with tf.variable_scope("decoder/output_projection"):
                self.output_layer = layers_core.Dense(self.vocab_size,
                                                      use_bias=False,
                                                      name="output_projection")

        # Training or inference graph
        print('\n\n{} Building graph for the model ...{}\n'.format(
            colorama.Fore.GREEN, colorama.Fore.RESET))
        print("***************************************")
        res = self.build_graph(hparams, scope=scope)

        if training:
            self.train_loss = res[1]
            self.word_count = tf.reduce_sum(self.batch_input.source_sequence_length) + \
                              tf.reduce_sum(self.batch_input.target_sequence_length)
            # Count the number of predicted words for compute perplexity.
            self.predict_count = tf.reduce_sum(
                self.batch_input.target_sequence_length)
        else:
            self.infer_logits, _, self.final_context_state, self.sample_id = res
            self.sample_words = self.reverse_vocab_table.lookup(
                tf.to_int64(self.sample_id))

        self.global_step = tf.Variable(0, trainable=False)

        params = tf.trainable_variables()

        # Gradients update operation for training the model.
        if training:
            self.learning_rate = tf.placeholder(tf.float32,
                                                shape=[],
                                                name='learning_rate')
            opt = tf.train.AdamOptimizer(self.learning_rate)

            gradients = tf.gradients(self.train_loss, params)
            # Added by Nuruzzaman on 18/05/2018
            # Run the training for at least one epoch, note down the time,
            # and then set it to False (or just remove it) and run the training
            # for at least one epoch and see if the times required for one epoch
            # are significantly different. It is shocking to me at least.
            colocate_gradients_with_ops = True

            clipped_gradients, gradient_norm_summary = model_helper.gradient_clip(
                gradients, max_gradient_norm=hparams.max_gradient_norm)

            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

            # Summary
            self.train_summary = tf.summary.merge([
                tf.summary.scalar("learning_rate", self.learning_rate),
                tf.summary.scalar("train_loss", self.train_loss),
            ] + gradient_norm_summary)
        else:
            self.infer_summary = tf.no_op()

        # Saver
        self.saver = tf.train.Saver(tf.global_variables())