Exemplo n.º 1
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        use_dropout = training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0)
        cell_fn = self.select_cell_fn(args.model)
        cells = self.create_cell_stack('hidden', cell_fn, args, use_dropout=use_dropout)
        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        softmax_w = self.create_var('rnnlm', 'softmax_w', [args.rnn_size, args.vocab_size])
        softmax_b = self.create_var('rnnlm', 'softmax_b', [args.vocab_size])

        embedding = self.create_var('rnnlm', 'embedding', [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])


        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
Exemplo n.º 2
0
 def cal_loss(out, labels):
     with tf.name_scope("loss"):
         labels = tf.reshape(labels, [-1])
         loss = seq2seq.sequence_loss_by_example(
             [out], [labels], [tf.ones_like(labels, dtype=tf.float32)])
         loss = tf.reduce_mean(loss)
     return loss
    def __init__(self, config, is_training=False):
        self.config = config
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.hidden_size = hidden_size = config.hidden_size
        self.num_layers = 1
        vocab_size = config.vocab_size
        self.max_grad_norm = config.max_grad_norm
        self.use_lstm = config.use_lstm

        # Placeholders for inputs.
        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.initial_state = array_ops.zeros(tf.stack([self.batch_size, self.num_steps]),
                 dtype=tf.float32).set_shape([None, self.num_steps])

        embedding = tf.get_variable('embedding', [self.config.vocab_size, self.config.hidden_size])

        # Set up ACT cell and inner rnn-type cell for use inside the ACT cell.
        with tf.variable_scope("rnn"):
            if self.use_lstm:
                inner_cell = BasicLSTMCell(self.config.hidden_size)
            else:
                inner_cell = GRUCell(self.config.hidden_size)

        with tf.variable_scope("ACT"):

            act = ACTCell(self.config.hidden_size, inner_cell, config.epsilon,
                          max_computation=config.max_computation, batch_size=self.batch_size)

        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        inputs = [tf.squeeze(single_input, [1]) for single_input in tf.split(inputs, self.config.num_steps, 1)]

        self.outputs, final_state = static_rnn(act, inputs, dtype = tf.float32)

        # Softmax to get probability distribution over vocab.
        output = tf.reshape(tf.concat(self.outputs, 1), [-1, hidden_size])
        softmax_w = tf.get_variable("softmax_w", [hidden_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b   # dim (numsteps*batchsize, vocabsize)

        loss = sequence_loss_by_example(
                [self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([batch_size * num_steps])],
                vocab_size)

        # Add up loss and retrieve batch-normalised ponder cost: sum N + sum Remainder.
        ponder_cost = act.calculate_ponder_cost(time_penalty=self.config.ponder_time_penalty)
        self.cost = (tf.reduce_sum(loss) / batch_size) + ponder_cost
        self.final_state = self.outputs[-1]

        if is_training:
            self.lr = tf.Variable(0.0, trainable=False)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.max_grad_norm)
            optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 4
0
    def build_graph(self):
        config = self.configuration
        self.reader = utils.DataReader(seq_len=config.seq_length,
                                       batch_size=config.batch_size,
                                       data_filename=config.data_filename)

        self.cell = FWRNNCell(num_units=config.rnn_size)

        self.input_data = tf.placeholder(tf.int32, [None, config.input_length])
        self.answers = tf.placeholder(tf.int32, [None, 1])
        self.initial_state = self.cell.zero_state(
            tf.shape(self.answers)[0], tf.float32)
        self.fw_initial = self.cell.fw_zero(
            tf.shape(self.answers)[0], tf.float32)

        with tf.variable_scope("emb_input"):
            embedding = tf.get_variable(
                "emb", [config.size_chars, config.embedding_size])
            inputs = tf.split(
                tf.nn.embedding_lookup(embedding, self.input_data),
                config.input_length, 1)
            inputs = [tf.squeeze(input, [1]) for input in inputs]

        with tf.variable_scope("rnn_desig"):
            state = (self.initial_state, self.fw_initial)
            output = None

            for i, input in enumerate(inputs):
                if i > 0:
                    tf.get_variable_scope().reuse_variables()
                output, state = self.cell(input, state)

        with tf.variable_scope("softmax"):
            softmax_w = tf.get_variable("softmax_w",
                                        [config.rnn_size, config.size_chars])
            softmax_b = tf.get_variable("softmax_b", [config.size_chars])
            self.logits = tf.matmul(output, softmax_w) + softmax_b
            self.p = tf.nn.softmax(self.logits)
            self.output = tf.cast(tf.reshape(tf.arg_max(self.p, 1), [-1, 1]),
                                  tf.int32)
            self.accuracy = tf.reduce_mean(
                tf.cast(tf.equal(self.output, self.answers), tf.float32))

            loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits], [tf.reshape(self.answers, [-1])],
                [tf.ones([config.batch_size])], config.size_chars)

        self.cost = tf.reduce_mean(loss)
        self.end_state = state

        train_vars = tf.trainable_variables()
        gradients, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, train_vars), config.grad_clip)
        optimizer = tf.train.AdamOptimizer()
        self.train_op = optimizer.apply_gradients(zip(gradients, train_vars))

        self.summary_accuracy = tf.summary.scalar('accuracy', self.accuracy)
        tf.summary.scalar('cost', self.cost)
        self.summary_all = tf.summary.merge_all()
Exemplo n.º 5
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        additional_cell_args = {}
        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'gridlstm':
            cell_fn = grid_rnn.Grid2LSTMCell
            additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0})
        elif args.model == 'gridgru':
            cell_fn = grid_rnn.Grid2GRUCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, **additional_cell_args)

        self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                  loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                                                [tf.reshape(self.targets, [-1])],
                                                [tf.ones([args.batch_size * args.seq_length])],
                                                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(list(zip(grads, tvars)))
Exemplo n.º 6
0
    def __init__(self,
                 vocabulary_size,
                 batch_size,
                 sequence_length,
                 hidden_layer_size,
                 cells_size,
                 gradient_clip=5.,
                 training=True):

        cells = []
        [cells.append(rnn.LSTMCell(hidden_layer_size)) for _ in range(cells_size)]
        self.cell = rnn.MultiRNNCell(cells)

        self.input_data = tf.placeholder(tf.int32, [batch_size, sequence_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, sequence_length])
        self.initial_state = self.cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE):
            softmax_layer = tf.get_variable("softmax_layer", [hidden_layer_size, vocabulary_size])
            softmax_bias = tf.get_variable("softmax_bias", [vocabulary_size])

        with tf.variable_scope("embedding", reuse=tf.AUTO_REUSE):
            embedding = tf.get_variable("embedding", [vocabulary_size, hidden_layer_size])
            inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        inputs = tf.split(inputs, sequence_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(previous, _):
            previous = tf.matmul(previous, softmax_layer) + softmax_bias
            previous_symbol = tf.stop_gradient(tf.argmax(previous, 1))
            return tf.nn.embedding_lookup(embedding, previous_symbol)

        with tf.variable_scope("rnn", reuse=tf.AUTO_REUSE):
            outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop if not training else None)
            output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_layer_size])

        self.logits = tf.matmul(output, softmax_layer) + softmax_bias
        self.probabilities = tf.nn.softmax(self.logits)

        loss = legacy_seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * sequence_length])])

        with tf.name_scope("cost"):
            self.cost = tf.reduce_sum(loss) / batch_size / sequence_length
        self.final_state = last_state
        self.learning_rate = tf.Variable(0.0, trainable=False)
        trainable_vars = tf.trainable_variables()

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_vars), gradient_clip)

        with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE):
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            self.train_op = optimizer.apply_gradients(zip(grads, trainable_vars))

        tf.summary.histogram("logits", self.logits)
        tf.summary.histogram("probabilitiess", self.probabilities)
        tf.summary.histogram("loss", loss)
        tf.summary.scalar("cost", self.cost)
        tf.summary.scalar("learning_rate", self.learning_rate)
Exemplo n.º 7
0
 def compute_cost(self):
     losses = sequence_loss_by_example(
         [tf.reshape(self.pred, [-1])], [tf.reshape(self.ys, [-1])],
         [tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
         average_across_timesteps=True,
         softmax_loss_function=self.msr_error)
     self.cost = tf.div(tf.reduce_sum(losses),
                        tf.cast(self.batch_size, tf.float32))
    def __init__(self, data, model='lstm', infer=False):
        self.rnn_size = 128
        self.n_layers = 2

        if infer:
            self.batch_size = 1
        else:
            self.batch_size = data.batch_size

        if model == 'rnn':
            cell_rnn = tf.nn.rnn_cell.BasicRNNCell
        elif model == 'gru':
            cell_rnn = tf.nn.rnn_cell.GRUCell
        elif model == 'lstm':
            cell_rnn = tf.nn.rnn_cell.LSTMCell
        cell = cell_rnn(
            self.rnn_size,
            name='basic_lstm_cell',
        )
        self.cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.n_layers)

        self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None])
        self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None])

        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [self.rnn_size, data.words_size])
            softmax_b = tf.get_variable("softmax_b", [data.words_size])
            # with tf.device("/gpu:1"):
            embedding = tf.get_variable("embedding",
                                        [data.words_size, self.rnn_size])
            inputs = tf.nn.embedding_lookup(embedding, self.x_tf)

        # tf.nn.dynamic_rnn可以运行输入的shape不同,
        # 而tf.nn.rnn必须要求输入的shape必须一致。
        outputs, final_state = tf.nn.dynamic_rnn(
            self.cell, inputs, initial_state=self.initial_state, scope='rnnlm')
        self.output = tf.reshape(outputs, [-1, self.rnn_size])
        self.logits = tf.matmul(self.output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.final_state = final_state
        pred = tf.reshape(self.y_tf, [-1])

        # seq2seq 损失
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [pred], [tf.ones_like(pred, dtype=tf.float32)])

        self.cost = tf.reduce_mean(loss)
        self.learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()

        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          clip_norm=5)

        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 9
0
    def __init__(self, args, data, infer=False):
        if infer:
            args.batch_size = 1
            args.seq_length = 1
        with tf.name_scope('inputs'):
            self.input_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])
            self.target_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])

        with tf.name_scope('model'):
            self.cell = MultiRNNCell([
                tf.nn.rnn_cell.DropoutWrapper(BasicLSTMCell(args.state_size),
                                              output_keep_prob=0.7)
            ] * args.num_layers)
            self.initial_state = self.cell.zero_state(args.batch_size,
                                                      tf.float32)
            with tf.variable_scope('rnnlm'):
                w = tf.get_variable('softmax_w',
                                    [args.state_size, data.vocab_size])
                b = tf.get_variable('softmax_b', [data.vocab_size])
                with tf.device("/cpu:0"):
                    embedding = tf.get_variable(
                        'embedding', [data.vocab_size, args.state_size])
                    self.embedding = embedding
                    inputs = tf.nn.embedding_lookup(embedding, self.input_data)
                    self.in_data = self.input_data
                    self.inputs = inputs
            outputs, last_state = tf.nn.dynamic_rnn(
                self.cell, inputs, initial_state=self.initial_state)

        with tf.name_scope('loss'):
            output = tf.reshape(outputs, [-1, args.state_size])

            self.logits = tf.matmul(output, w) + b
            self.probs = tf.nn.softmax(self.logits)
            self.last_state = last_state

            targets = tf.reshape(self.target_data, [-1])
            loss = seq2seq.sequence_loss_by_example(
                [self.logits], [targets],
                [tf.ones_like(targets, dtype=tf.float32)])
            self.cost = tf.reduce_sum(loss) / args.batch_size
            tf.summary.scalar('loss', self.cost)

        with tf.name_scope('optimize'):
            self.lr = tf.placeholder(tf.float32, [])
            tf.summary.scalar('learning_rate', self.lr)

            optimizer = tf.train.AdamOptimizer(self.lr)
            tvars = tf.trainable_variables()
            grads = tf.gradients(self.cost, tvars)
            for g in grads:
                tf.summary.histogram(g.name, g)
            grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)

            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
            self.merged_op = tf.summary.merge_all()
Exemplo n.º 10
0
    def init_ops(self):
        self._build_placeholder()

        # Get loss and prediction operations
        with tf.variable_scope(self.name) as scope:

            # Reuse variables for validation
            if self.reuse_params:
                scope.reuse_variables()

            # Build model
            network = self.build_model(input_var=self.input_var)

            # Softmax linear
            name = "l{}_softmax_linear".format(self.layer_idx)
            network = fc(name=name,
                         input_var=network,
                         n_hiddens=self.n_classes,
                         bias=0.0,
                         wd=0)
            self.activations.append((name, network))
            self.layer_idx += 1

            # Outputs of softmax linear are logits
            self.logits = network

            ######### Compute loss #########

            # Weighted cross-entropy loss for a sequence of logits (per example)
            ''' the old code
            loss = tf.nn.seq2seq.sequence_loss_by_example(
                [self.logits],
                [self.target_var],
                [tf.ones([self.batch_size * self.seq_length])],
                name="sequence_loss_by_example"
            )
            '''
            loss = legacy_seq2seq.sequence_loss_by_example(
                logits=[self.logits],
                targets=[self.target_var],
                weights=[tf.ones([self.batch_size * self.seq_length])])
            loss = tf.reduce_sum(loss) / self.batch_size
            # Regularization loss
            regular_loss = tf.add_n(tf.get_collection("losses",
                                                      scope=scope.name + "\/"),
                                    name="regular_loss")

            # print " "
            # print "Params to compute regularization loss:"
            # for p in tf.get_collection("losses", scope=scope.name + "\/"):
            #     print p.name
            # print " "

            # Total loss
            self.loss_op = tf.add(loss, regular_loss)

            # Predictions
            self.pred_op = tf.argmax(self.logits, 1)
Exemplo n.º 11
0
def generate_sequence_output(
        num_encoder_symbols,
        encoder_outputs,
        encoder_state,
        targets,
        sequence_length,
        num_decoder_symbols,  # vocab_size
        weights,
        buckets,
        softmax_loss_function=None,
        per_example_loss=False,
        name=None,
        use_attention=False):

    if len(targets) < buckets[-1][1]:  # decoder_output_length
        raise ValueError("Length of targets (%d) must be at least that of last"
                         "bucket (%d)." % (len(targets), buckets[-1][1]))

    # 4-1. Define all input
    all_inputs = encoder_outputs + targets + weights
    with tf.name_scope(name, "model_with_buckets", all_inputs):
        with tf.variable_scope("decoder_sequence_output", reuse=None):

            # 4-2. Get attention_encoder_outputs(=logits) and attention_weights
            logits, attention_weights = attention_RNN(
                encoder_outputs,
                encoder_state,
                num_decoder_symbols,
                sequence_length,
                use_attention=use_attention)

            if per_example_loss is None:
                assert len(logits) == len(targets)

                # 4-3. Reshape to calculate loss.
                bucket_target = [
                    tf.reshape(tf.to_int64(x), [-1]) for x in targets
                ]

                # 4-4. Get loss function
                crossent = sequence_loss_by_example(
                    logits,
                    bucket_target,
                    weights,
                    softmax_loss_function=softmax_loss_function)
            else:
                assert len(logits) == len(targets)
                bucket_target = [
                    tf.reshape(tf.to_int64(x), [-1]) for x in targets
                ]
                crossent = sequence_loss(
                    logits,
                    bucket_target,
                    weights,
                    softmax_loss_function=softmax_loss_function)

    return logits, crossent
Exemplo n.º 12
0
    def loss_op(self):

        loss = seq2seq.sequence_loss_by_example(
            [self.logits],
            [self.pred],
            [tf.ones_like(self.pred, dtype=tf.float64)],
        )

        self.cost = tf.reduce_mean(loss)
Exemplo n.º 13
0
def model(cell_state_size, rnn_cells_depth, batch_size, batch_len,
          number_of_tokens, reuse):

    input_placeholder = tf.placeholder(tf.int32,
                                       shape=(None, batch_len),
                                       name="input")
    target_placeholder = tf.placeholder(tf.int32,
                                        shape=(None, batch_len),
                                        name="target")
    # make dictionary for letters (60, 128)

    with tf.variable_scope("rnn") as scope:
        if reuse:
            scope.reuse_variables()

        cell = tf.nn.rnn_cell.BasicLSTMCell(cell_state_size)
        #cell = tf.nn.rnn_cell.BasicRNNCell(cell_state_size)
        #cell = tf.contrib.rnn.IntersectionRNNCell(cell_state_size)
        #cell = tf.contrib.rnn.LSTMCell(cell_state_size)
        #cell = tf.contrib.rnn.TimeFreqLSTMCell(cell_state_size)
        rnn_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * rnn_cells_depth)

        W = tf.get_variable("W", shape=(128, number_of_tokens))
        b = tf.get_variable("b", shape=(number_of_tokens))

        embedding = tf.get_variable("embedding", [number_of_tokens, 128])
        # (60, 50, 128)
        rnn_input = tf.nn.embedding_lookup(embedding, input_placeholder)
        # 50 of (60, 1, 128)
        rnn_input = tf.split(rnn_input, batch_len, axis=1)
        rnn_input = [tf.squeeze(rni, [1]) for rni in rnn_input]

        # map input from id numbers to rnn states
        decoder_initial_state = rnn_cell.zero_state(batch_size, tf.float32)
        # outputs list of 50 - (60,128)
        outputs, last_state = seq2seq.rnn_decoder(rnn_input,
                                                  decoder_initial_state,
                                                  rnn_cell,
                                                  scope="rnn")
    # (60, -1)
    outputs = tf.concat(outputs, 1)
    # (-1, 128) ie a list of letters
    outputs = tf.reshape(outputs, [-1, 128])

    # (3000, number_of_tokens)
    logits = tf.matmul(outputs, W) + b
    #probs = tf.nn.softmax(logits, 1, name="probs")
    probs = tf.nn.softmax(logits, -1, name="probs")

    loss = seq2seq.sequence_loss_by_example(
        [logits], [tf.reshape(target_placeholder, [-1])],
        [tf.ones([batch_size * batch_len])], number_of_tokens)
    return ([
        loss, probs, decoder_initial_state, input_placeholder,
        target_placeholder, last_state, logits
    ])
Exemplo n.º 14
0
    def __init__(self, data, model='lstm', infer=False, 
                layers_size=128, num_layers=2):
        self.rnn_size = layers_size
        self.n_layers = num_layers
        print('build model')

        if infer:
            self.batch_size = 1
        else:
            self.batch_size = data.batch_size

        if model == 'rnn':
            cell_rnn = rnn.BasicRNNCell
        elif model == 'gru':
            cell_rnn = rnn.GRUCell
        elif model == 'lstm':
            cell_rnn = rnn.BasicLSTMCell

        cell = cell_rnn(self.rnn_size, state_is_tuple=False)
        self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False)

        self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None])
        self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None])

        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size])
            softmax_b = tf.get_variable("softmax_b", [data.words_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable(
                    "embedding", [data.words_size, self.rnn_size])
                inputs = tf.nn.embedding_lookup(embedding, self.x_tf)

        outputs, final_state = tf.nn.dynamic_rnn(
            self.cell, inputs, initial_state=self.initial_state, scope='rnnlm')

        self.output = tf.reshape(outputs, [-1, self.rnn_size])
        self.logits = tf.matmul(self.output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.pre = tf.argmax(self.probs, 1)
        self.final_state = final_state
        pred = tf.reshape(self.y_tf, [-1])
        # seq2seq
        loss = seq2seq.sequence_loss_by_example([self.logits],
                                                [pred],
                                                [tf.ones_like(pred, dtype=tf.float32)],)

        self.cost = tf.reduce_mean(loss)
        self.learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)

        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        print('build model done')
Exemplo n.º 15
0
    def __init__(self, args, embedding):
        self.args = args

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_input')
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_targets')
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.embedding = embedding
        with tf.variable_scope('STAND'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            inputs = tf.split(axis=1, num_or_size_splits=args.seq_length, value=tf.nn.embedding_lookup(self.embedding, self.input_data))
            inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs])

        def loop(prev, i):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.l2_normalize(tf.nn.embedding_lookup(embedding, prev_symbol), 1)

        o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='STAND')
        with tf.variable_scope('STAND', reuse=True) as scope:
            sf_o, _ = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope=scope)
        output = tf.reshape(tf.concat(axis=1, values=o), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        sf_output = tf.reshape(tf.concat(axis=1, values=sf_o), [-1, args.rnn_size])
        self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b
        self.self_feed_probs = tf.nn.softmax(self_feed_logits)

        loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                args.grad_clip)
        for g, v in zip(grads, tvars):
            print v.name
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 16
0
    def build(self):
        self.inputs = tf.placeholder(tf.int32, [self.batch_size, None])
        self.targets = tf.placeholder(tf.int32, [self.batch_size, None])
        self.keep_prob = tf.placeholder(tf.float32)
        self.seq_len = tf.placeholder(tf.int32, [self.batch_size])
        self.learning_rate = tf.placeholder(tf.float64)

        with tf.variable_scope('rnn'):
            w = tf.get_variable("softmax_w",
                                [self.num_units, self.data.words_size])
            b = tf.get_variable("softmax_b", [self.data.words_size])

            embedding = tf.get_variable("embedding",
                                        [self.data.words_size, self.num_units])
            inputs = tf.nn.embedding_lookup(embedding, self.inputs)

        self.cell = tf.nn.rnn_cell.MultiRNNCell(
            [self.unit() for _ in range(self.num_layer)])
        self.init_state = self.cell.zero_state(self.batch_size,
                                               dtype=tf.float32)
        output, self.final_state = tf.nn.dynamic_rnn(
            self.cell,
            inputs=inputs,
            sequence_length=self.seq_len,
            initial_state=self.init_state,
            scope='rnn')
        with tf.name_scope('fc'):
            y = tf.reshape(output, [-1, self.num_units])
            logits = tf.matmul(y, w) + b

        with tf.name_scope('softmax'):
            prob = tf.nn.softmax(logits)

        self.prob = tf.reshape(prob, [self.batch_size, -1])
        pre = tf.argmax(prob, 1)
        self.pre = tf.reshape(pre, [self.batch_size, -1])

        targets = tf.reshape(self.targets, [-1])
        with tf.name_scope('loss'):
            loss = seq2seq.sequence_loss_by_example(
                [logits], [targets], [tf.ones_like(targets, dtype=tf.float32)])
            self.loss = tf.reduce_mean(loss)

        with tf.name_scope('summary'):
            tf.summary.scalar('loss', self.loss)
            self.merged_summary = tf.summary.merge_all()

        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              5)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 17
0
 def __init__(self,args, mode='TRAIN'):
     '''Create the model.
     
     Args:
         args: parsed arguments
         mode: TRAIN | EVAL | INFER
     '''
     # When sample, the batch and seq length = 1
     if mode == 'INFER':
         args.batch_size = 1
         args.seq_length = 1
     cell = rnn.BasicLSTMCell(args.rnn_size,state_is_tuple = True)
     self.cell = cell = rnn.MultiRNNCell([cell]*args.num_layers, state_is_tuple = True)
     
     # Build the inputs and outputs placeholders
     self.input_data = tf.placeholder(tf.int32,[args.batch_size,args.seq_length])
     self.targets = tf.placeholder(tf.int32,[args.batch_size,args.seq_length])
     self.initial_state = cell.zero_state(args.batch_size,dtype = tf.float32)
     
     with tf.name_scope('rnn_cells'):
         # final w
         softmax_w = tf.get_variable('softmax_w',[args.rnn_size,args.vocab_size])
         # final bias
         softmax_b = tf.get_variable('softmax_b',[args.vocab_size])
         with tf.device('/cpu:0'):
             embedding = tf.get_variable('embedding',[args.vocab_size,args.rnn_size],
                                         dtype = tf.float32)
             inputs = tf.split(tf.nn.embedding_lookup(embedding,self.input_data),
                               args.seq_length,1)
             inputs = [tf.squeeze(input_,[1]) for input_ in inputs]
     
     def loop(prev, _):
         prev = tf.matmul(prev,softmax_w) + softmax_b
         prev_symbol = tf.stop_gradient(tf.argmax(prev,1))
         return tf.nn.embedding_lookup(embedding,prev_symbol)
     
     ## Using legacy_seq2seq#####################################
     outputs, last_state = legacy_seq2seq.rnn_decoder(
             inputs,self.initial_state,cell,loop_function=loop if mode != 'INFER' else None,
             scope = 'rnn_cells')
     output = tf.reshape(tf.concat(outputs,1),[-1,args.rnn_size])
     self.logits = tf.matmul(output,softmax_w) + softmax_b
     self.probs = tf.nn.softmax(self.logits)
     loss = legacy_seq2seq.sequence_loss_by_example(
             logits = [self.logits],targets = [tf.reshape(self.targets,[-1])],
             weights = [tf.ones([args.batch_size*args.seq_length])])
     self.cost = tf.reduce_mean(loss)/args.batch_size/args.seq_length
     self.final_state = last_state
     self.lr = tf.Variable(0.0, trainable = False)
     tvars = tf.trainable_variables()
     grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost,tvars),args.grad_clip)
     optimizer = tf.train.AdamOptimizer(self.lr)
     self.train_op = optimizer.apply_gradients(zip(grads,tvars))
Exemplo n.º 18
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        def lstm_cell(lstm_size):
            return tf.contrib.rnn.BasicLSTMCell(lstm_size)

        cells = []
        for i in range(args.num_layers):
            cells.append(lstm_cell(args.lstm_size))

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.output_data = tf.placeholder(tf.int32,
                                          [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('lstm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.lstm_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.lstm_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        inputs = tf.split(
            inputs, args.seq_length,
            1)  # splits the input into subtensor sequences dimension 1
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs, self.initial_state, cell)
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.lstm_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.predicted_output = tf.reshape(tf.argmax(self.probs, 1),
                                           [args.batch_size, args.seq_length])

        ## loss definition
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.output_data, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.eta = tf.Variable(0.0, trainable=False)
        self.optimizer = tf.train.AdamOptimizer(self.eta).minimize(self.cost)
Exemplo n.º 19
0
def generate_sequence_output(num_encoder_symbols,
                             encoder_outputs,
                             encoder_state,
                             sequence_length,
                             num_decoder_symbols,
                             targets,
                             weights,
                             buckets,
                             softmax_loss_function=None,
                             per_example_loss=False,
                             name=None,
                             use_attention=False):
    if len(targets) and len(targets) < buckets[-1][1]:
        raise ValueError("Length of targets (%d) must be at least that of last"
                         "bucket (%d)." % (len(targets), buckets[-1][1]))

    all_inputs = encoder_outputs + targets + weights
    with tf.name_scope(name, "model_with_buckets", all_inputs):
        with tf.variable_scope("decoder_sequence_output", reuse=None):
            logits, attention_weights = attention_RNN(
                encoder_outputs,
                encoder_state,
                num_decoder_symbols,
                sequence_length,
                use_attention=use_attention)

            if len(targets):
                if per_example_loss is None:
                    assert len(logits) == len(targets)
                    # We need to make target and int64-tensor and set its shape.
                    bucket_target = [
                        tf.reshape(tf.to_int64(x), [-1]) for x in targets
                    ]
                    crossent = sequence_loss_by_example(
                        logits,
                        bucket_target,
                        weights,
                        softmax_loss_function=softmax_loss_function)
                else:
                    assert len(logits) == len(targets)
                    bucket_target = [
                        tf.reshape(tf.to_int64(x), [-1]) for x in targets
                    ]
                    crossent = sequence_loss(
                        logits,
                        bucket_target,
                        weights,
                        softmax_loss_function=softmax_loss_function)
            else:
                crossent = None

    return logits, crossent
Exemplo n.º 20
0
    def __init__(self, infer=False):
        if infer:
            CONFIG["batch_size"] = 1
            CONFIG["seq_length"] = 1
        with tf.name_scope('inputs'):
            self.input_data = tf.placeholder(
                tf.int32, [CONFIG["batch_size"], CONFIG["seq_length"]])
            self.target_data = tf.placeholder(
                tf.int32, [CONFIG["batch_size"], CONFIG["seq_length"]])

        with tf.name_scope('model'):
            self.cell = rnn.GRUCell(CONFIG["hidden_size"])
            self.cell = rnn.MultiRNNCell([self.cell] * CONFIG["num_layers"])
            self.initial_state = self.cell.zero_state(CONFIG["batch_size"],
                                                      tf.float32)
            with tf.variable_scope('rnnlm'):
                softmax_w = tf.get_variable(
                    'softmax_w', [CONFIG["hidden_size"], data.vocabulary_size])
                softmax_b = tf.get_variable('softmax_b',
                                            [data.vocabulary_size])
                embedding = tf.get_variable(
                    'embedding', [data.vocabulary_size, CONFIG["hidden_size"]])
                inputs = tf.nn.embedding_lookup(embedding, self.input_data)
            outputs, last_state = tf.nn.dynamic_rnn(
                self.cell, inputs, initial_state=self.initial_state)

        with tf.name_scope('loss'):
            output = tf.reshape(outputs, [-1, CONFIG["hidden_size"]])

            self.logits = tf.matmul(output, softmax_w) + softmax_b
            self.probs = tf.nn.softmax(self.logits)
            self.last_state = last_state

            targets = tf.reshape(self.target_data, [-1])
            loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits], [targets],
                [tf.ones_like(targets, dtype=tf.float32)])
            self.cost = tf.reduce_sum(loss) / CONFIG["batch_size"]
            tf.summary.scalar('loss', self.cost)

        with tf.name_scope('optimize'):
            self.lr = tf.placeholder(tf.float32, [])
            tf.summary.scalar('learning_rate', self.lr)
            optimizer = tf.train.AdamOptimizer(self.lr)
            tvars = tf.trainable_variables()
            grads = tf.gradients(self.cost, tvars)
            for g in grads:
                tf.summary.histogram(g.name, g)
            grads, _ = tf.clip_by_global_norm(grads, CONFIG["grad_clip"])

            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
            self.merged_op = tf.summary.merge_all()
Exemplo n.º 21
0
    def __init__(self, data, model='lstm', infer=False):
        self.rnn_size = 128
        self.n_layers = 2

        if infer:
            self.batch_size = 1
        else:
            self.batch_size = data.batch_size

        if model == 'rnn':
            cell_rnn = rnn.BasicRNNCell
        elif model == 'gru':
            cell_rnn = rnn.GRUCell
        elif model == 'lstm':
            cell_rnn = rnn.BasicLSTMCell

        cell = cell_rnn(self.rnn_size, state_is_tuple=False)
        self.cell = rnn.MultiRNNCell([cell] * self.n_layers, state_is_tuple=False)

        self.x_tf = tf.placeholder(tf.int32, [self.batch_size, None])
        self.y_tf = tf.placeholder(tf.int32, [self.batch_size, None])

        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [self.rnn_size, data.words_size])
            softmax_b = tf.get_variable("softmax_b", [data.words_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable(
                    "embedding", [data.words_size, self.rnn_size])
                inputs = tf.nn.embedding_lookup(embedding, self.x_tf)

        outputs, final_state = tf.nn.dynamic_rnn(
            self.cell, inputs, initial_state=self.initial_state, scope='rnnlm')

        self.output = tf.reshape(outputs, [-1, self.rnn_size])
        self.logits = tf.matmul(self.output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.final_state = final_state
        pred = tf.reshape(self.y_tf, [-1])
        # seq2seq
        loss = seq2seq.sequence_loss_by_example([self.logits],
                                                [pred],
                                                [tf.ones_like(pred, dtype=tf.float32)],)

        self.cost = tf.reduce_mean(loss)
        self.learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)

        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 22
0
    def __init__(self, args, training=True):
        """Initialize RNN model"""
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        cell_fn = rnn.GRUCell
        cells = [cell_fn(args.rnn_size) for _ in range(args.num_layers)]

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)
        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnn'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                         loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])])

        with tf.name_scope('loss'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length

        self.final_state = last_state
        self.learning_rate = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip)

        with tf.name_scope('optimization'):
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 23
0
    def loss(self, batch_x, batch_y):
        self.inference(batch_x, training=True)
        batch_size = int(batch_x.shape[0])
        seq_length = int(batch_x.shape[1])

        seq_loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logit], [tf.reshape(batch_y, [-1])],
            [tf.ones([batch_size * seq_length])])

        with tf.name_scope('loss'):
            self.loss = tf.reduce_sum(seq_loss) / batch_size / seq_length

        return self.loss
Exemplo n.º 24
0
    def __init__(self, args, data):
        with tf.name_scope("inputs"):
            self.input_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])
            self.target_data = tf.placeholder(
                tf.int32, [args.batch_size, args.seq_length])

        with tf.name_scope("model"):
            self.cell = rnn.MultiRNNCell([
                rnn.BasicLSTMCell(args.state_size)
                for _ in range(args.num_layer)
            ])
            self.initial_state = self.cell.zero_state(args.batch_size,
                                                      tf.float32)
            with tf.variable_scope("rnnlm"):
                w = tf.get_variable('softmax_w',
                                    [args.state_size, data.vocab_size])
                b = tf.get_variable('softmax_b', [data.vocab_size])
                with tf.device('/cpu:0'):
                    embedding = tf.get_variable(
                        'embedding', [data.vocab_size, args.state_size])
                    inputs = tf.nn.embedding_lookup(embedding, self.input_data)
                outputs, last_state = tf.nn.dynamic_rnn(
                    self.cell, inputs, initial_state=self.initial_state)

        with tf.name_scope("loss"):
            output = tf.reshape(outputs, [-1, args.state_size])
            self.logits = tf.matmul(output, w) + b
            self.probs = tf.nn.softmax(self.logits)
            self.last_state = last_state

            targets = tf.reshape(self.target_data, [-1])
            loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits], [targets],
                [tf.ones_like(targets, dtype=tf.float32)])
            self.cost = tf.reduce_sum(loss) / args.batch_size
            scalar_summary('loss', self.cost)

        with tf.name_scope('optimize'):
            self.lr = tf.placeholder(tf.float32, [])
            scalar_summary('learning_rate', self.lr)

            optimizer = tf.train.AdamOptimizer(self.lr)
            train_vars = tf.trainable_variables()
            grads = tf.gradients(self.cost, train_vars)
            for g in grads:
                histogram_summary(g.name, g)
            grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)
            self.train_op = optimizer.apply_gradients(zip(grads, train_vars))
            self.merged_op = merge_all_summaries()
Exemplo n.º 25
0
 def compute_cost(self):
     from tensorflow.contrib import legacy_seq2seq
     losses = legacy_seq2seq.sequence_loss_by_example(
         [tf.reshape(self.pred, [-1], name='reshape_pred')],
         [tf.reshape(self.ys, [-1], name='reshape_target')],
         [tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
         average_across_timesteps=True,
         softmax_loss_function=self.ms_error,
         name='losses')
     with tf.name_scope('average_cost'):
         self.cost = tf.div(tf.reduce_sum(losses, name='losses_sum'),
                            self.batch_size,
                            name='average_cost')
         tf.summary.scalar('cost', self.cost)
Exemplo n.º 26
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, state_is_tuple=False)

        self.cell = cell = rnn.MultiRNNCell([cell] * args.num_layers,
                                            state_is_tuple=False)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, None])
        # the length of input sequence is variable.
        self.targets = tf.placeholder(tf.int32, [args.batch_size, None])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        outputs, last_state = tf.nn.dynamic_rnn(
            cell, inputs, initial_state=self.initial_state, scope='rnnlm')
        output = tf.reshape(outputs, [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        targets = tf.reshape(self.targets, [-1])
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [targets],
            [tf.ones_like(targets, dtype=tf.float32)], args.vocab_size)
        self.cost = tf.reduce_mean(loss)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 27
0
def train_neural_network():
    logits, last_state, _, _, _ = neural_network()
    targets = tf.reshape(output_targets, [-1])
    from tensorflow.contrib import legacy_seq2seq
    loss = legacy_seq2seq.sequence_loss_by_example(
        [logits], [targets], [tf.ones_like(targets, dtype=tf.float32)],
        len(words))
    cost = tf.reduce_mean(loss)
    learning_rate = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(zip(grads, tvars))

    Session_config = tf.ConfigProto(allow_soft_placement=True)
    Session_config.gpu_options.allow_growth = True

    trainds = DataSet(len(poetrys_vector))

    with tf.Session(config=Session_config) as sess:
        with tf.device('/gpu:2'):
            sess.run(tf.global_variables_initializer())

            saver = tf.train.Saver(tf.global_variables())
            last_epoch = load_model(sess, saver, 'model/')

            for epoch in range(last_epoch + 1, 100):
                sess.run(tf.assign(learning_rate, 0.002 * (0.97**epoch)))
                #sess.run(tf.assign(learning_rate, 0.01))

                all_loss = 0.0
                for batche in range(n_chunk):
                    x, y = trainds.next_batch(batch_size)
                    train_loss, _, _ = sess.run([cost, last_state, train_op],
                                                feed_dict={
                                                    input_data: x,
                                                    output_targets: y
                                                })

                    all_loss = all_loss + train_loss

                    if batche % 50 == 1:
                        #print(epoch, batche, 0.01,train_loss)
                        print(epoch, batche, 0.002 * (0.97**epoch), train_loss)

                saver.save(sess, 'model/poetry.module', global_step=epoch)
                print(epoch, ' Loss: ', all_loss * 1.0 / n_chunk)
Exemplo n.º 28
0
 def __init__(self, args, text, test=False):
     if test:
         args.batch_size = 1
         args.seq_length = 1
     with tf.name_scope('inputs'):
         self.input_text = tf.placeholder(
             tf.int32, [args.batch_size, args.seq_length])
         self.target_text = tf.placeholder(
             tf.int32, [args.batch_size, args.seq_length])
     with tf.name_scope('model'):
         # LSTM单元 state_size为隐藏层的大小
         self.cell = rnn.BasicLSTMCell(args.state_size)
         self.cells = rnn.MultiRNNCell([self.cell] * args.num_layers)
         self.initial_state = self.cells.zero_state(args.batch_size,
                                                    tf.float32)
         with tf.variable_scope('rnnlm'):
             w = tf.get_variable('softmax_w',
                                 [args.state_size, text.vocabulary_size])
             b = tf.get_variable('softmax_b', [text.vocabulary_size])
             with tf.device('/cpu:0'):
                 embedding = tf.get_variable(
                     'embedding', [text.vocabulary_size, args.state_size])
                 inputs = tf.nn.embedding_lookup(embedding, self.input_text)
         outputs, last_state = tf.nn.dynamic_rnn(
             self.cells, inputs, initial_state=self.initial_state)
     with tf.name_scope('loss'):
         output = tf.reshape(outputs, [-1, args.state_size])
         self.logits = tf.matmul(output, w) + b
         self.probs = tf.nn.softmax(self.logits)
         self.last_state = last_state
         targets = tf.reshape(self.target_text, [-1])
         loss = seq2seq.sequence_loss_by_example(
             [self.logits], [targets],
             [tf.ones_like(targets, dtype=tf.float32)])
         self.loss_avg = tf.reduce_sum(loss) / args.batch_size
         tf.summary.scalar('loss', self.loss_avg)
     with tf.name_scope('optimize'):
         self.lr = tf.placeholder(tf.float32, [])
         tf.summary.scalar('learning_rate', self.lr)
         optimizer = tf.train.AdamOptimizer(self.lr)
         tvars = tf.trainable_variables()
         grads = tf.gradients(self.loss_avg, tvars)
         for g in grads:
             tf.summary.histogram(g.name, g)
         grads, _ = tf.clip_by_global_norm(grads, args.grad_clip)
         self.train_op = optimizer.apply_gradients(zip(grads, tvars))
         self.merged_op = tf.summary.merge_all()
Exemplo n.º 29
0
    def loss(self, out):
        with tf.name_scope("loss"):
            targets = tf.reshape(self.y, [-1])
            out = tf.reshape(out, [-1, self.num_class])
            loss = seq2seq.sequence_loss_by_example([out],
                                                [targets],
                                                [tf.ones_like(targets, dtype=tf.float32)])
            self.loss = tf.reduce_mean(loss)
            self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(
                self.loss, global_step=self.global_step)

        with tf.name_scope("output"):
            out = tf.nn.softmax(out)
            self.prob = tf.reshape(out,[-1, self.maxlen, self.num_class], name = 'prob')
            out_max = tf.argmax(self.prob,-1, output_type = tf.int32)
            self.predictions = tf.reshape(out_max, [-1, self.maxlen], name = 'predictions')

        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, self.y)
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Exemplo n.º 30
0
    def pre_processing(self, args, cell, training):
        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)
        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                         loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        # output layer
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits],
            [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
Exemplo n.º 31
0
def generate_sequence_output(num_encoder_symbols,
                             encoder_outputs, 
                             encoder_state, 
                             targets,
                             sequence_length, 
                             num_decoder_symbols, 
                             weights,
                             buckets, 
                             softmax_loss_function=None,
                             per_example_loss=False, 
                             name=None, 
                             use_attention=False):
  if len(targets) < buckets[-1][1]:
    raise ValueError("Length of targets (%d) must be at least that of last"
                     "bucket (%d)." % (len(targets), buckets[-1][1]))

  all_inputs = encoder_outputs + targets + weights
  with tf.name_scope(name, "model_with_buckets", all_inputs):
    with tf.variable_scope("decoder_sequence_output", reuse=None):
      logits, attention_weights = attention_RNN(encoder_outputs, 
                                                encoder_state,
                                                num_decoder_symbols,
                                                sequence_length,
                                                use_attention=use_attention)
      if per_example_loss is None:
        assert len(logits) == len(targets)
        # We need to make target and int64-tensor and set its shape.
        bucket_target = [tf.reshape(tf.to_int64(x), [-1]) for x in targets]
        crossent = sequence_loss_by_example(
              logits, bucket_target, weights,
              softmax_loss_function=softmax_loss_function)
      else:
        assert len(logits) == len(targets)
        bucket_target = [tf.reshape(tf.to_int64(x), [-1]) for x in targets]
        crossent = sequence_loss(
              logits, bucket_target, weights,
              softmax_loss_function=softmax_loss_function)

  return logits, crossent
Exemplo n.º 32
0
def train_neural_network():
    logits, last_state, _, _, _ = neural_network()
    targets = tf.reshape(output_targets, [-1])  # [batch_size*28*28,]
    # loss = tf.nn.seq2seq.sequence_loss_by_example([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)],
    #                                               len(words))
    loss = legacy_seq2seq.sequence_loss_by_example(
        [logits], [targets], [tf.ones_like(targets, dtype=tf.float32)])

    # loss = legacy_seq2seq.sequence_loss([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)])

    cost = tf.reduce_mean(loss)
    learning_rate = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(zip(grads, tvars))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        n_chunk = len(mnist.train.images) // batch_size
        for epoch in range(epochs):
            sess.run(tf.assign(learning_rate, 0.002 * (0.97**epoch)))
            # n = 0
            for batche in range(21):  # range(n_chunk):
                x_batches, _ = mnist.train.next_batch(batch_size)
                # y_batches=np.round(x_batches).astype(np.uint8)
                train_loss, _, _ = sess.run(
                    [cost, last_state, train_op],
                    feed_dict={
                        input_data: x_batches.reshape([-1, 28, 28, 1]),
                        output_targets: x_batches
                    })
                # n += 1
                if batche % 20 == 0:
                    print(epoch, batche, train_loss)
            if epoch % 1 == 0:
                saver.save(sess, logdir + 'model.ckpt', global_step=epoch)
Exemplo n.º 33
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            if training and (args.output_keep_prob < 1.0
                             or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(
                    cell,
                    input_keep_prob=args.input_keep_prob,
                    output_keep_prob=args.output_keep_prob)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if not training else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])

        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
Exemplo n.º 34
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(cell,
                                          input_keep_prob=args.input_keep_prob,
                                          output_keep_prob=args.output_keep_prob)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells, state_is_tuple=True)

        self.input_data = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(
            tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])

        embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        inputs = tf.split(inputs, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if not training else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])


        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
                [self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])])
        with tf.name_scope('cost'):
            self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # instrument tensorboard
        tf.summary.histogram('logits', self.logits)
        tf.summary.histogram('loss', loss)
        tf.summary.scalar('train_loss', self.cost)
Exemplo n.º 35
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32)
        self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1)
        self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False)
        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)
        tf.summary.scalar("time_batch", self.batch_time)

        def variable_summaries(var):
            """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                #with tf.name_scope('stddev'):
                #   stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                #tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                #tf.summary.histogram('histogram', var)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemplo n.º 36
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        additional_cell_args = {}
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
            additional_cell_args.update({'state_is_tuple': False})
        elif args.model == 'gridlstm':
            cell_fn = grid_rnn.Grid2LSTMCell
            additional_cell_args.update({'use_peepholes': True, 'forget_bias': 1.0,
                                         'state_is_tuple': False, 'output_is_tuple': False})
        elif args.model == 'gridgru':
            cell_fn = grid_rnn.Grid2GRUCell
            additional_cell_args.update({'state_is_tuple': False, 'output_is_tuple': False})
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, **additional_cell_args)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=False)

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data),
                                  num_or_size_splits=args.seq_length, axis=1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell,
                                                  loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, axis=1), [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example([self.logits],
                                                [tf.reshape(self.targets, [-1])],
                                                [tf.ones([args.batch_size * args.seq_length])],
                                                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))