Пример #1
0
    def __init__(self, cl_logits_input_dim=None):
        self.global_step = tf.train.get_or_create_global_step()
        self.vocab_freqs = _get_vocab_freqs()

        # Cache VatxtInput objects
        self.cl_inputs = None
        self.lm_inputs = None

        # Cache intermediate Tensors that are reused
        self.tensors = {}

        # Construct layers which are reused in constructing the LM and
        # Classification graphs. Instantiating them all once here ensures that
        # variable reuse works correctly.
        self.layers = {}
        self.layers['embedding'] = layers_lib.Embedding(
            FLAGS.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings,
            self.vocab_freqs, FLAGS.keep_prob_emb)
        self.layers['lstm'] = layers_lib.LSTM(FLAGS.rnn_cell_size,
                                              FLAGS.rnn_num_layers,
                                              FLAGS.keep_prob_lstm_out)
        self.layers['lm_loss'] = layers_lib.SoftmaxLoss(
            FLAGS.vocab_size,
            FLAGS.num_candidate_samples,
            self.vocab_freqs,
            name='LM_loss')

        cl_logits_input_dim = cl_logits_input_dim or FLAGS.rnn_cell_size
        self.layers['cl_logits'] = layers_lib.cl_logits_subgraph(
            [FLAGS.cl_hidden_size] * FLAGS.cl_num_layers, cl_logits_input_dim,
            FLAGS.num_classes, FLAGS.keep_prob_cl_hidden)
Пример #2
0
    def _init_embed(self):
        self.char_embed = tf.Variable(initial_value=tf.truncated_normal(
            [
                len(self.model_args.characterEmbed.character2id),
                self.args.char_rnn_size
            ],
            stddev=0.01),
                                      trainable=True,
                                      name="char_embeddings",
                                      dtype=tf.float32)

        self.word_embed_matrix = tf.Variable(initial_value=self.word_embed_pl,
                                             trainable=False)

        self.layers = {}
        self.layers['BiLSTM'] = layers_lib.BiLSTM(self.args.rnn_size)
        self.layers['LSTM'] = layers_lib.LSTM(
            self.args.char_rnn_size)  #随便设置的char的

        self.label_embedding = tf.get_variable('label_embeddings',
                                               initializer=tf.truncated_normal(
                                                   [500, self.args.class_size],
                                                   stddev=0.01),
                                               trainable=True)

        self.input_data_embed = tf.nn.embedding_lookup(self.word_embed_matrix,
                                                       self.input_data)
Пример #3
0
    def __init__(self):
        super(VatxtBidirModel,
              self).__init__(cl_logits_input_dim=FLAGS.rnn_cell_size * 2)

        # Reverse LSTM and LM loss for bidirectional models
        self.layers['lstm_reverse'] = layers_lib.LSTM(FLAGS.rnn_cell_size,
                                                      FLAGS.rnn_num_layers,
                                                      FLAGS.keep_prob_lstm_out,
                                                      name='LSTM_Reverse')
        self.layers['lm_loss_reverse'] = layers_lib.SoftmaxLoss(
            FLAGS.vocab_size,
            FLAGS.num_candidate_samples,
            self.vocab_freqs,
            name='LM_loss_reverse')
Пример #4
0
    def __init__(self, cl_logits_input_dim=None):
        self.layers = {}
        self.initialize_vocab()
        self.layers['embedding'] = layers_lib.Embedding(
            self.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings,
            self.vocab_freqs, FLAGS.keep_prob_emb)
        self.layers['embedding_1'] = layers_lib.Embedding(
            self.vocab_size,
            FLAGS.embedding_dims,
            FLAGS.normalize_embeddings,
            self.vocab_freqs,
            FLAGS.keep_prob_emb,
            name='embedding_1')

        self.layers['lstm'] = layers_lib.LSTM(FLAGS.rnn_cell_size,
                                              FLAGS.rnn_num_layers)
        self.layers['lstm_1'] = layers_lib.BiLSTM(FLAGS.rnn_cell_size,
                                                  FLAGS.rnn_num_layers,
                                                  name="Bilstm")
        action_type = 5 if FLAGS.action == 'all' else 4
        self.layers['action_select'] = layers_lib.Actionselect(
            action_type, FLAGS.keep_prob_dense, name='action_output')
        self.layers['cl_logits'] = layers_lib.Project_layer(
            FLAGS.num_classes, FLAGS.keep_prob_dense, name='project_layer')
Пример #5
0
    def __init__(self, n_inputs: int, n_outputs: int, state_size: int, n_layers: int):
        super().__init__()

        self.lstm = layers.LSTM(n_inputs, state_size, n_layers)
        self.output_projection = layers.Linear(state_size, n_outputs)
Пример #6
0
    def build(self):
        if self.options.debug:
            theano.config.compute_test_value = "warn"

        ''' Create Theano Variables '''
        features = T.ftensor3('features') # (m,f,c)'
        features.tag.test_value = self.test_values[0]
        features_mask = T.fmatrix('features_mask') # (m,f)
        features_mask.tag.test_value = self.test_values[1]

        context_b = T.lmatrix('context b') # (m,f,c)'
        context_b.tag.test_value = self.test_values[2]
        context_b_mask = T.fmatrix('context_b_mask') # (m,f)
        context_b_mask.tag.test_value = self.test_values[3]

        context_a = T.lmatrix('context a') # (m,f,c)'
        context_a.tag.test_value = self.test_values[4]
        context_a_mask = T.fmatrix('context_a_mask') # (m,f)
        context_a_mask.tag.test_value = self.test_values[5]

        label = T.lvector('label') # (m,)
        label.tag.test_value = self.test_values[6]


        ''' Initialize model param '''
        self.params = []
        self.Wemb = theano.shared(
            common.norm_weight(self.options.n_words,
                               self.options.dim_word), name='Wemb')
        if self.options.train_emb:
            self.params.append(self.Wemb)

        self.lstm_vid = layers.LSTM(options=self.options,
                                    num_inputs=self.options.input_dim,
                                    num_hiddens=self.options.hdims,
                                    prefix="lstm_vid")
        self.lstm_vid.init_tparams()
        if not self.options.text_only:
            self.params += self.lstm_vid.params

        self.lstm_ctxb = layers.LSTM(options=self.options,
                                     num_inputs=self.options.dim_word,
                                     num_hiddens=self.options.hdims,
                                     prefix="ctxb_lstm")
        self.lstm_ctxb.init_tparams()
        self.params += self.lstm_ctxb.params

        self.lstm_ctxa = layers.LSTM(options=self.options,
                                     num_inputs=self.options.dim_word,
                                     num_hiddens=self.options.hdims,
                                     prefix="ctxa_lstm")
        self.lstm_ctxa.init_tparams()
        self.params += self.lstm_ctxa.params

        ''' Get population statistics '''
        popstats = OrderedDict()
        popstats.update(self.lstm_vid.get_popstat())
        popstats.update(self.lstm_ctxb.get_popstat())
        popstats.update(self.lstm_ctxa.get_popstat())
        popstats['ff_x_mean'] = T.fvector('x_mean_ff')
        popstats['ff_x_var'] = T.fvector('x_var_ff')


        '''Classification '''
        n_words = self.options.n_words_out
        if self.options.text_only:
            self.W, self.b = init_tparams_fc(nin=2*self.options.hdims,
                                             nout=n_words, prefix='logit')
        else:
            self.W, self.b = init_tparams_fc(nin=3*self.options.hdims,
                                             nout=n_words, prefix='logit')
        self.params += [self.W, self.b]

        ''' Construct theano grah '''
        [cost, ce, err, y_hat] = self.get_fprop(features, features_mask,
                                                context_b, context_b_mask,
                                                context_a, context_a_mask,
                                                label)

        '''Compute gradient'''
        # [grad_dummy_h_vid, grad_dummy_c_vid, grad_dummy_xW_hU_b_vid] = T.grad(
        #     cost, [dummy_h_vid, dummy_c_vid, dummy_xW_hU_b_vid])
        grads = T.grad(cost, wrt=self.params)
        if self.options.clip_c > 0.:
            g2 = 0.
            for g in grads:
                g2 += (g**2).sum()
            new_grads = []
            for g in grads:
                new_grads.append(
                    T.switch(g2 > (self.options.clip_c**2),
                             g / T.sqrt(g2) * self.options.clip_c, g))
            grads = new_grads
        print 'start compiling theano fns'
        t0 = time.time()

        inputs = [features, features_mask,
                  context_b, context_b_mask,
                  context_a, context_a_mask,
                  label]
        print 'compile train fns'
        self.f_grad_shared, self.f_update = eval(self.options.optimizer)(
            T.scalar(name='lr'), self.params, grads,
            inputs, cost,
            extra=[ce, err])
        self.f_train = theano.function(inputs, [cost, ce, err, y_hat], name='f_train', on_unused_input='warn')
        ### Write y from y_hat and dictionary

        ''' Batch Norm population graph '''
        print 'get estimate for inference'
        symbatchstats, estimators = get_stat_estimator([ce])
        sample_stat_inputs = []
        self.f_stat_estimator = None
        if len(estimators) > 0:
            self.f_stat_estimator = theano.function(
                [features, features_mask,
                 context_a, context_a_mask,
                 context_b, context_b_mask], estimators,
                on_unused_input='warn')
            self.options.use_popstats = True
            for v in symbatchstats:
                print v.tag.bn_label
                sample_stat_inputs.append(popstats[v.tag.bn_label])
            # Get inference graph
            [cost, ce, err, y_hat] = self.get_fprop(features, features_mask,
                                                    context_b, context_b_mask,
                                                    context_a, context_a_mask,
                                                    label,
                                                    popstats=popstats)
        self.options.use_popstats = False
        print 'compile inference fns'
        inputs = inputs + sample_stat_inputs
        self.f_inference = theano.function(inputs, [cost, ce, err, y_hat],
                                           on_unused_input='warn',
                                           name='f_inference')
        print 'compiling theano fns used %.2f sec'%(time.time()-t0)