示例#1
0
class Parsing_Model_Joint(object):
    def add_placeholders(self):
        self.inputs_placeholder_dict = {}
        for feature in self.features:
            if feature == 'chars':
                self.inputs_placeholder_dict[feature] = tf.placeholder(
                    tf.int32, shape=[None, None, None])
            else:
                self.inputs_placeholder_dict[feature] = tf.placeholder(
                    tf.int32, shape=[None, None])

        self.keep_prob = tf.placeholder(tf.float32)
        self.input_keep_prob = tf.placeholder(tf.float32)
        self.hidden_prob = tf.placeholder(tf.float32)
        self.mlp_prob = tf.placeholder(tf.float32)

    def add_word_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('word_embedding') as scope:
                embedding = tf.get_variable(
                    'word_embedding_mat',
                    self.loader.word_embeddings.shape,
                    initializer=tf.constant_initializer(
                        self.loader.word_embeddings))

            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['words']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        return inputs

    def add_jackknife_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('jk_embedding') as scope:
                embedding = tf.get_variable(
                    'jk_embedding_mat',
                    [self.loader.nb_jk + 1, self.opts.jk_dim
                     ])  # +1 for padding
            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['jk']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        return inputs

    def add_stag_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('stag_embedding') as scope:
                embedding = tf.get_variable(
                    'stag_embedding_mat',
                    [self.loader.nb_stags, self.opts.stag_dim
                     ])  # +1 for padding
            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['stags']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
            #tf.add_to_collection('stag_embedding', embedding)
        return inputs

    def add_char_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('char_embedding') as scope:
                embedding = tf.get_variable(
                    'char_embedding_mat',
                    [self.loader.nb_chars + 1, self.opts.chars_dim
                     ])  # +1 for padding

            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['chars']
            )  ## [batch_size, seq_len-1, word_len, embedding_dim]
            ## -1 because we don't have ROOT
            inputs = tf.transpose(inputs, perm=[1, 0, 2, 3])
            ## [seq_len-1, batch_size, word_len, embedding_dim]
            inputs = self.add_dropout(inputs, self.input_keep_prob)
            weights = get_char_weights(self.opts, 'char_encoding')
            inputs = encode_char(
                inputs, weights)  ## [seq_len-1, batch_size, nb_filters]
            shape = tf.shape(inputs)
            ## add 0 vectors for <-root->
            inputs = tf.concat([tf.zeros([1, shape[1], shape[2]]), inputs], 0)
        return inputs

    def add_lstm(self, inputs, i, name, backward=False):
        prev_init = tf.zeros([2, tf.shape(inputs)[1],
                              self.opts.units])  # [2, batch_size, num_units]
        #prev_init = tf.zeros([2, 100, self.opts.units])  # [2, batch_size, num_units]
        if i == 0:
            inputs_dim = self.inputs_dim
        else:
            inputs_dim = self.opts.units * 2  ## concat after each layer
        weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i),
                                   inputs_dim, self.opts.units,
                                   tf.shape(inputs)[1], self.hidden_prob)
        if backward:
            ## backward: reset states after zero paddings
            non_paddings = tf.transpose(
                self.weight,
                [1, 0])  ## [batch_size, seq_len] => [seq_len, batch_size]
            non_paddings = tf.reverse(non_paddings, [0])
            cell_hidden = tf.scan(
                lambda prev, x: lstm(prev, x, weights, backward=backward),
                [inputs, non_paddings], prev_init)
        else:
            cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights),
                                  inputs, prev_init)
        #cell_hidden [seq_len, 2, batch_size, units]
        h = tf.unstack(cell_hidden, 2,
                       axis=1)[1]  #[seq_len, batch_size, units]
        return h

    def add_dropout(self, inputs, keep_prob):
        ## inputs [seq_len, batch_size, inputs_dims/units]
        dummy_dp = tf.ones(tf.shape(inputs)[1:])
        dummy_dp = tf.nn.dropout(dummy_dp, keep_prob)
        return tf.map_fn(lambda x: dummy_dp * x, inputs)

    def add_projection(self, inputs):
        with tf.variable_scope('Projection') as scope:
            proj_U = tf.get_variable('weight',
                                     [self.outputs_dim, self.loader.nb_tags])
            proj_b = tf.get_variable('bias', [self.loader.nb_tags])
            outputs = tf.matmul(inputs, proj_U) + proj_b
            return outputs

    def add_loss_op(self, output, gold):
        cross_entropy = sequence_loss(output, gold, self.weight)
        loss = tf.reduce_sum(cross_entropy)
        return loss

    def add_accuracy(self, output, gold):
        predictions = tf.cast(tf.argmax(output, 2),
                              tf.int32)  ## [batch_size, seq_len]
        correct_predictions = self.weight * tf.cast(
            tf.equal(predictions, gold), tf.float32)
        accuracy = tf.reduce_sum(tf.cast(correct_predictions,
                                         tf.float32)) / tf.reduce_sum(
                                             tf.cast(self.weight, tf.float32))
        return predictions, accuracy

    def add_probs(self, output):
        self.probs = tf.nn.softmax(output)

    def add_train_op(self, loss):
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss)
        return train_op

    def get_features(self):
        self.features = ['words', 'arcs', 'rels']
        if self.opts.jk_dim > 0:
            self.features.append('jk')
        if (self.opts.stag_dim > 0) or (self.opts.model
                                        == 'Parsing_Model_Joint'):
            self.features.append('stags')
        if self.opts.chars_dim > 0:
            self.features.append('chars')

    def add_biaffine(self, inputs):
        ## inputs [seq_len, batch_size, units]
        ## first define four different MLPs
        arc_roles = ['arc-dep', 'arc-head']
        rel_roles = ['rel-dep', 'rel-head']
        joint_roles = ['stag']
        vectors = {}
        for arc_role in arc_roles:
            for i in xrange(self.opts.mlp_num_layers):
                if i == 0:
                    inputs_dim = self.outputs_dim
                    vector_mlp = inputs
                else:
                    inputs_dim = self.opts.arc_mlp_units
                weights = get_mlp_weights('{}_MLP_Layer{}'.format(arc_role, i),
                                          inputs_dim, self.opts.arc_mlp_units)
                vector_mlp = self.add_dropout(
                    tf.map_fn(lambda x: mlp(x, weights), vector_mlp),
                    self.mlp_prob)
                ## [seq_len, batch_size, 2*mlp_units]
            vectors[arc_role] = vector_mlp
        weights = get_arc_weights('arc', self.opts.arc_mlp_units)
        arc_output = arc_equation(
            vectors['arc-head'], vectors['arc-dep'], weights
        )  # [batch_size, seq_len, seq_len] dim 1: deps, dim 2: heads
        #        arc_predictions = get_arcs(arc_output, self.test_opts) # [batch_size, seq_len]
        arc_predictions = tf.argmax(arc_output, 2)  # [batch_size, seq_len]
        for rel_role in rel_roles:
            for i in xrange(self.opts.mlp_num_layers):
                if i == 0:
                    inputs_dim = self.outputs_dim
                    vector_mlp = inputs
                else:
                    inputs_dim = self.opts.rel_mlp_units
                weights = get_mlp_weights('{}_MLP_Layer{}'.format(rel_role, i),
                                          inputs_dim, self.opts.rel_mlp_units)
                vector_mlp = self.add_dropout(
                    tf.map_fn(lambda x: mlp(x, weights), vector_mlp),
                    self.mlp_prob)
                ## [seq_len, batch_size, 2*mlp_units]
            vectors[rel_role] = vector_mlp
        weights = get_rel_weights('rel', self.opts.rel_mlp_units,
                                  self.loader.nb_rels)
        rel_output, rel_scores = rel_equation(
            vectors['rel-head'], vectors['rel-dep'], weights,
            arc_predictions)  #[batch_size, seq_len, nb_rels]
        ## joint stagging
        for joint_role in joint_roles:
            for i in xrange(self.opts.mlp_num_layers):
                if i == 0:
                    inputs_dim = self.outputs_dim
                    vector_mlp = inputs
                else:
                    inputs_dim = self.opts.joint_mlp_units
                weights = get_mlp_weights(
                    '{}_MLP_Layer{}'.format(joint_role, i), inputs_dim,
                    self.opts.joint_mlp_units)
                vector_mlp = self.add_dropout(
                    tf.map_fn(lambda x: mlp(x, weights), vector_mlp),
                    self.mlp_prob)
                ## [seq_len, batch_size, 2*mlp_units]
            vectors[joint_role] = vector_mlp
        weights = get_joint_weights('stag', self.opts.joint_mlp_units,
                                    self.loader.nb_stags)
        self.stag_embeddings = tf.transpose(weights['W-joint'], [1, 0])
        joint_output = joint_equation(
            vectors['stag'], weights)  # [batch_size, seq_len, nb_stags]
        return arc_output, rel_output, rel_scores, joint_output

    def __init__(self, opts, test_opts=None):

        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = 100
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.jk_dim + self.opts.stag_dim + self.opts.nb_filters
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.jk_dim:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.stag_dim > 0:
            inputs_list.append(self.add_stag_embedding())
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob)
        inputs_shape = tf.shape(self.inputs_placeholder_dict['words'])
        ## no need to worry about the heads of <-root-> and zero-pads
        ## Let's get those non-padding places so we can reinitialize hidden states after each padding in the backward path
        ### because the backward path starts with zero pads.
        self.weight = tf.cast(
            tf.not_equal(self.inputs_placeholder_dict['words'],
                         tf.zeros(inputs_shape, tf.int32)),
            tf.float32)  ## [batch_size, seq_len]
        for i in xrange(self.opts.num_layers):
            forward_outputs_tensor = self.add_dropout(
                self.add_lstm(inputs_tensor, i, 'Forward'),
                self.keep_prob)  ## [seq_len, batch_size, units]
            if self.opts.bi:
                backward_outputs_tensor = self.add_dropout(
                    self.add_lstm(tf.reverse(inputs_tensor, [0]), i,
                                  'Backward', True),
                    self.keep_prob)  ## [seq_len, batch_size, units]
                inputs_tensor = tf.concat([
                    forward_outputs_tensor,
                    tf.reverse(backward_outputs_tensor, [0])
                ], 2)
            else:
                inputs_tensor = forward_outputs_tensor
        self.weight = self.weight * tf.cast(
            tf.not_equal(
                self.inputs_placeholder_dict['words'],
                tf.ones(inputs_shape, tf.int32) *
                self.loader.word_index['<-root->']),
            tf.float32)  ## [batch_size, seq_len]
        lstm_outputs = inputs_tensor  ## [seq_len, batch_size, outputs_dim]

        self.arc_outputs, rel_outputs, self.rel_scores, joint_output = self.add_biaffine(
            lstm_outputs)
        #        projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags]
        #        projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags]
        self.loss = self.add_loss_op(
            self.arc_outputs,
            self.inputs_placeholder_dict['arcs']) + self.add_loss_op(
                rel_outputs,
                self.inputs_placeholder_dict['rels']) + self.add_loss_op(
                    joint_output, self.inputs_placeholder_dict['stags'])
        self.add_probs(joint_output)
        self.predicted_arcs, self.UAS = self.add_accuracy(
            self.arc_outputs, self.inputs_placeholder_dict['arcs'])
        self.predicted_rels, self.rel_acc = self.add_accuracy(
            rel_outputs, self.inputs_placeholder_dict['rels'])
        self.predicted_stags, self.stag_acc = self.add_accuracy(
            joint_output, self.inputs_placeholder_dict['stags'])
        self.train_op = self.add_train_op(self.loss)

    def run_batch(self, session, testmode=False):
        if not testmode:
            feed = {}
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[
                    feat]] = self.loader.inputs_train_batch[feat]
            feed[self.keep_prob] = self.opts.dropout_p
            feed[self.hidden_prob] = self.opts.hidden_p
            feed[self.input_keep_prob] = self.opts.input_dp
            feed[self.mlp_prob] = self.opts.mlp_prob
            train_op = self.train_op
            _, loss, UAS, rel_acc, stag_acc = session.run(
                [train_op, self.loss, self.UAS, self.rel_acc, self.stag_acc],
                feed_dict=feed)
            return loss, UAS, rel_acc, stag_acc
        else:
            feed = {}
            predictions_batch = {}
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[
                    feat]] = self.loader.inputs_test_batch[feat]
            feed[self.keep_prob] = 1.0
            feed[self.hidden_prob] = 1.0
            feed[self.input_keep_prob] = 1.0
            feed[self.mlp_prob] = 1.0
            #            loss, accuracy, predictions, weight = session.run([self.loss, self.accuracy, self.predictions, self.weight], feed_dict=feed)
            loss, predicted_arcs, predicted_rels, UAS, weight, arc_outputs, rel_scores, stag_acc, predicted_stags, probs = session.run(
                [
                    self.loss, self.predicted_arcs, self.predicted_rels,
                    self.UAS, self.weight, self.arc_outputs, self.rel_scores,
                    self.stag_acc, self.predicted_stags, self.probs
                ],
                feed_dict=feed)
            weight = weight.astype(bool)
            predicted_arcs_greedy = predicted_arcs[weight]
            predicted_rels_greedy = predicted_rels[weight]
            predicted_stags = predicted_stags[weight]
            predictions_batch['arcs_greedy'] = predicted_arcs_greedy
            predictions_batch['rels_greedy'] = predicted_rels_greedy
            predictions_batch['stags'] = predicted_stags
            non_padding = weight.astype(bool)
            non_padding[:, 0] = True  ## take the dummy root nodes
            predicted_arcs, predicted_rels = predict_arcs_rels(
                arc_outputs, rel_scores, non_padding)
            predictions_batch['arcs'] = predicted_arcs
            predictions_batch['rels'] = predicted_rels
            probs = probs[weight]
            #            print(predicted_greedy_arcs.shape)
            #            print(predicted_arcs.shape)
            #print(arc_outputs.shape)
            return loss, predictions_batch, UAS, probs

    def run_epoch(self, session, testmode=False):

        if not testmode:
            epoch_start_time = time.time()
            next_batch = self.loader.next_batch
            epoch_incomplete = next_batch(self.batch_size)
            while epoch_incomplete:
                loss, UAS, rel_acc, stag_acc = self.run_batch(session)
                print(
                    '{}/{}, loss {:.4f}, Raw UAS {:.4f}, Rel Acc {:.4f}, Stag Acc {:.4f}'
                    .format(self.loader._index_in_epoch,
                            self.loader.nb_train_samples, loss, UAS, rel_acc,
                            stag_acc),
                    end='\r')
                epoch_incomplete = next_batch(self.batch_size)
            print('\nEpoch Training Time {}'.format(time.time() -
                                                    epoch_start_time))
            return loss, UAS
        else:
            next_test_batch = self.loader.next_test_batch
            test_incomplete = next_test_batch(self.batch_size)
            output_types = [
                'arcs', 'rels', 'arcs_greedy', 'rels_greedy', 'stags'
            ]
            predictions = {output_type: [] for output_type in output_types}
            probs = []
            while test_incomplete:
                loss, predictions_batch, UAS, probs_batch = self.run_batch(
                    session, True)

                for name, pred in predictions_batch.items():
                    predictions[name].append(pred)
                #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r')
                probs.append(probs_batch)
                print('Test mode {}/{}, Raw UAS {:.4f}'.format(
                    self.loader._index_in_test,
                    self.loader.nb_validation_samples, UAS),
                      end='\r')  #, end = '\r')
                test_incomplete = next_test_batch(self.batch_size)
            for name, pred in predictions.items():
                predictions[name] = np.hstack(pred)
            if self.test_opts is not None:
                self.loader.output_arcs(predictions['arcs'],
                                        self.test_opts.predicted_arcs_file)
                self.loader.output_rels(predictions['rels'],
                                        self.test_opts.predicted_rels_file)
                self.loader.output_arcs(
                    predictions['arcs_greedy'],
                    self.test_opts.predicted_arcs_file_greedy)
                self.loader.output_rels(
                    predictions['rels_greedy'],
                    self.test_opts.predicted_rels_file_greedy)
                self.loader.output_stags(predictions['stags'],
                                         self.test_opts.predicted_stags_file)
                if self.test_opts.save_probs:
                    self.loader.output_probs(np.vstack(probs))
                if self.test_opts.get_weight:
                    stag_embeddings = session.run(self.stag_embeddings)
                    self.loader.output_weight(stag_embeddings)

            scores = self.loader.get_scores(predictions, self.opts,
                                            self.test_opts)
            #scores['UAS'] = np.mean(predictions['arcs'][self.loader.punc] == self.loader.gold_arcs[self.loader.punc])
            #scores['UAS_greedy'] = np.mean(predictions['arcs_greedy'][self.loader.punc] == self.loader.gold_arcs[self.loader.punc])
            return scores
示例#2
0
class Stagging_Model_Concat_Hw(object):
    def add_placeholders(self):
        #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(2+self.opts.suffix+self.opts.num+self.opts.cap+self.opts.jackknife)] # 2 for text_sequences and tag_sequences, necessary no matter what
        #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(6)] # 2 for text_sequences and tag_sequences, necessary no matter what
        self.inputs_placeholder_dict = {}
        for feature in self.features:
            if feature == 'chars':
                self.inputs_placeholder_dict[feature] = tf.placeholder(tf.int32, shape = [None, None, None])
            else:
                self.inputs_placeholder_dict[feature] = tf.placeholder(tf.int32, shape = [None, None])

        self.keep_prob = tf.placeholder(tf.float32)  
        self.input_keep_prob = tf.placeholder(tf.float32)  
        self.hidden_prob = tf.placeholder(tf.float32)  

    def add_word_embedding(self): 
        with tf.device('/cpu:0'):
            with tf.variable_scope('word_embedding') as scope:
                embedding = tf.get_variable('word_embedding_mat', self.loader.word_embeddings.shape, initializer=tf.constant_initializer(self.loader.word_embeddings))

            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['words']) ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim]
        return inputs 

    def add_suffix_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('suffix_embedding') as scope:
                embedding = tf.get_variable('suffix_embedding_mat', [self.loader.nb_suffixes+1, self.opts.suffix_dim]) # +1 for padding

            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['suffix']) ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim]
        return inputs 

    def add_cap(self):
        inputs = tf.cast(tf.expand_dims(self.inputs_placeholder_dict['cap'], -1), tf.float32)
        inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1]
        return inputs # [seq_length, batch_size, 1]

    def add_num(self):
        inputs = tf.cast(tf.expand_dims(self.inputs_placeholder_dict['num'], -1), tf.float32)
        inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1]
        return inputs # [seq_length, batch_size, 1]

    def add_char_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('char_embedding') as scope:
                embedding = tf.get_variable('char_embedding_mat', [self.loader.nb_chars+1, self.opts.chars_dim]) # +1 for padding

            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['chars']) ## [batch_size, seq_len, word_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2, 3])
            ## [seq_len, batch_size, word_len, embedding_dim]
            inputs = self.add_dropout(inputs, self.input_keep_prob)
            weights = get_char_weights(self.opts, 'char_encoding')
            inputs = encode_char(inputs, weights) ## [seq_len, batch_size, nb_filters]
        return inputs 

    def add_jackknife_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('jk_embedding') as scope:
                embedding = tf.get_variable('jk_embedding_mat', [self.loader.nb_jk+1, self.opts.jk_dim]) # +1 for padding
            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['jk']) ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim]
        return inputs 

    def add_lstm(self, inputs, i, name, backward=False):
        prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units])  # [2, batch_size, num_units]
        #prev_init = tf.zeros([2, 100, self.opts.units])  # [2, batch_size, num_units]
        if i == 0:
            inputs_dim = self.inputs_dim
        else:
            inputs_dim = self.opts.units*2 ## concat after each layer
        weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob)
        if backward:
            ## backward: reset states after zero paddings
            non_paddings = tf.transpose(self.weight, [1, 0]) ## [batch_size, seq_len] => [seq_len, batch_size]
            non_paddings = tf.reverse(non_paddings, [0])
            cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights, backward=backward), [inputs, non_paddings], prev_init)
        else:
            cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init)
         #cell_hidden [seq_len, 2, batch_size, units]
        h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units]
        return h

    def add_dropout(self, inputs, keep_prob):
        ## inputs [seq_len, batch_size, inputs_dims/units]
        dummy_dp = tf.ones(tf.shape(inputs)[1:])
        dummy_dp = tf.nn.dropout(dummy_dp, keep_prob)
        return tf.map_fn(lambda x: dummy_dp*x, inputs)

    def add_projection(self, inputs, reuse=False, name=None): 
        if name is None:
            name = 'Projection'
        with tf.variable_scope(name) as scope:
            if reuse:
                scope.reuse_variables()
            proj_U = tf.get_variable('weight', [self.outputs_dim, self.loader.nb_tags]) 
            tf.add_to_collection('stag_embedding', proj_U)
            self.proj_U = proj_U
            proj_b = tf.get_variable('bias', [self.loader.nb_tags])
            outputs = tf.matmul(inputs, proj_U)+proj_b 
        return outputs

    def add_loss_op(self, output):
        cross_entropy = sequence_loss(output, self.inputs_placeholder_dict['tags'], self.weight)
        tf.add_to_collection('total loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total loss'))
        return loss

    def add_accuracy(self, output):
        self.predictions = tf.cast(tf.argmax(output, 2), tf.int32) ## [batch_size, seq_len]
        correct_predictions = self.weight*tf.cast(tf.equal(self.predictions, self.inputs_placeholder_dict['tags']), tf.float32)
        self.accuracy = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))/tf.reduce_sum(tf.cast(self.weight, tf.float32))
    def add_probs(self, output):
        self.probs = tf.nn.softmax(output)

    def add_train_op(self, loss):
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss)
        return train_op

    def get_features(self):
        self.features = ['words', 'tags']
        if self.opts.suffix_dim > 0:
            self.features.append('suffix')
        if self.opts.cap:
            self.features.append('cap')
        if self.opts.num:
            self.features.append('num')
        if self.opts.jk_dim > 0:
            self.features.append('jk')
        if self.opts.chars_dim > 0:
            self.features.append('chars')
    
    def __init__(self, opts, test_opts=None):
       
        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = opts.batch_size
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters
        self.outputs_dim = (1+self.opts.bi)*self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim]
        inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob)
        self.weight = tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len]
        for i in xrange(self.opts.num_layers):
            forward_outputs_tensor = self.add_dropout(self.add_lstm(inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units]
            if self.opts.bi:
                backward_outputs_tensor = self.add_dropout(self.add_lstm(tf.reverse(inputs_tensor, [0]), i, 'Backward', True), self.keep_prob) ## [seq_len, batch_size, units]
                inputs_tensor = tf.concat([forward_outputs_tensor, tf.reverse(backward_outputs_tensor, [0])], 2)
            else:
                inputs_tensor = forward_outputs_tensor
        lstm_outputs = inputs_tensor ## [seq_len, batch_size, outputs_dim]
        projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags]
        projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags]
        self.loss = self.add_loss_op(projected_outputs)
        self.train_op = self.add_train_op(self.loss)
        self.add_accuracy(projected_outputs)
        self.add_probs(projected_outputs)

    def run_batch(self, session, testmode = False):
        if not testmode:
            feed = {}
            #for placeholder, data in zip(self.inputs_placeholder_list, self.loader.inputs_train_batch):
            #    feed[placeholder] = data
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[feat]] = self.loader.inputs_train_batch[feat]
            feed[self.keep_prob] = self.opts.dropout_p
            feed[self.hidden_prob] = self.opts.hidden_p
            feed[self.input_keep_prob] = self.opts.input_dp
            train_op = self.train_op
            _, loss, accuracy = session.run([train_op, self.loss, self.accuracy], feed_dict=feed)
            return loss, accuracy
        else:
            feed = {}
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[feat]] = self.loader.inputs_test_batch[feat]
            feed[self.keep_prob] = 1.0
            feed[self.hidden_prob] = 1.0
            feed[self.input_keep_prob] = 1.0
            loss, accuracy, predictions, weight, probs = session.run([self.loss, self.accuracy, self.predictions, self.weight, self.probs], feed_dict=feed)
            weight = weight.astype(bool)
            predictions = predictions[weight]
            probs = probs[weight]
            return loss, accuracy, predictions, probs

    def run_epoch(self, session, testmode = False):

        if not testmode:
            epoch_start_time = time.time()
            next_batch = self.loader.next_batch
            epoch_incomplete = next_batch(self.batch_size)
            while epoch_incomplete:
                loss, accuracy = self.run_batch(session)
                print('{}/{}, loss {:.4f}, accuracy {:.4f}'.format(self.loader._index_in_epoch, self.loader.nb_train_samples, loss, accuracy), end = '\r')
                epoch_incomplete = next_batch(self.batch_size)
            print('\nEpoch Training Time {}'.format(time.time() - epoch_start_time))
            return loss, accuracy
        else: 
            next_test_batch = self.loader.next_test_batch
            test_incomplete = next_test_batch(self.batch_size)
            predictions = []
            probs = []
            while test_incomplete:
                loss, accuracy, predictions_batch, probs_batch = self.run_batch(session, True)
                predictions.append(predictions_batch)
                probs.append(probs_batch)
                #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r')
                print('Test mode {}/{}'.format(self.loader._index_in_test, self.loader.nb_validation_samples), end = '\r')
                test_incomplete = next_test_batch(self.batch_size)
            predictions = np.hstack(predictions)
            probs = np.vstack(probs)
            if self.test_opts is not None:
                self.loader.output_stags(predictions, self.test_opts.save_tags)
                if self.test_opts.save_probs:
                    self.loader.output_probs(probs)
                if self.test_opts.get_weight:
                    stag_embeddings = session.run(self.proj_U)
                    self.loader.output_weight(stag_embeddings)
                        
            accuracy = np.mean(predictions == self.loader.test_gold)
            return accuracy