Python _xavier_weight_init示例，utils._xavier_weight_init Python示例

示例#1

0

显示文件

文件： dmn_plus.py 项目： pnii/neural-engineers-first-attempt

    def inference(self):
        """Performs inference on the DMN model"""

        #word_embedding = np.random.uniform(-config.embedding_init, config.embedding_init, (len(ivocab), config.embed_size))

        # set up embedding
        embeddings = tf.Variable(self.word_embedding.astype(np.float32),
                                 name="Embedding")
        '''^i think this is based on input vocab size'''

        with tf.variable_scope("input", initializer=_xavier_weight_init()):
            print('==> get input representation')
            fact_vecs = self.get_input_representation(embeddings)

        # keep track of attentions for possible strong supervision
        self.attentions = []
        #self.prev_a_all = []
        #self.prev_y_all = []

        # memory module
        with tf.variable_scope("memory",
                               initializer=_xavier_weight_init(),
                               reuse=None):
            '''you have an arbitrary length and prev_a and prev_y here for now'''

            #'''
            #prev_a = tf.zeros_like(tf.transpose(fact_vecs, perm=[1,0,2])[0])
            prev_a = tf.transpose(fact_vecs, perm=[1, 0, 2])[-1]
            prev_y = tf.zeros([self.config.batch_size, self.target_vocab_size])
            #'''

            #prev_a = [tf.zeros_like(tf.transpose(fact_vecs, perm=[1,0,2])[0])]
            #prev_y = [tf.zeros([self.config.batch_size, self.target_vocab_size])]

            output = []
            prev_a_all = []
            prev_y_all = []
            arbitrary_num = 2
            print(self.max_input_len)
            print(self.max_t_len)
            #for i in range(0, arbitrary_num):
            for i in range(0, self.max_t_len):
                #print i
                prev_a_all.append(prev_a)
                prev_y_all.append(prev_y)
                #prev_a, prev_y, vocab_probs = self.attention_decode_for_each_output_step(prev_a, prev_y, fact_vecs)
                prev_a, prev_y, vocab_probs, attn_iters_step, attn_halt_probs_step = self.attention_decode_for_each_output_step(
                    prev_a_all, prev_y_all, fact_vecs)
                output.append(vocab_probs)
                self.attn_iters.append(attn_iters_step)
                self.attn_halt_probs.append(attn_halt_probs_step)
                #print self.attn_iters

        return output

示例#2

0

显示文件

    def add_decode_variables(self):
        '''based on github.com/tensorflow/tensorflow/issues/5608#issuecomment-260549420'''
        self.total_input_hops = self.config.num_hops  #version for if you want to set it
        with tf.variable_scope("memory/decode",
                               initializer=_xavier_weight_init()):
            untied_weights = tf.get_variable(
                "W_t", (self.total_input_hops, 2 * self.config.hidden_size,
                        self.config.hidden_size))
            untied_biases = tf.get_variable("bias_t", (
                self.total_input_hops,
                self.config.hidden_size,
            ))
            #'''

        # The clear_after_read variable must be False, otherwise the TA will
        # only allow you to read from that index once.
        self.weight_container = tf.TensorArray(tf.float32,
                                               self.total_input_hops,
                                               clear_after_read=False,
                                               dynamic_size=None,
                                               name="w_container")

        self.bias_container = tf.TensorArray(tf.float32,
                                             self.total_input_hops,
                                             clear_after_read=False,
                                             dynamic_size=None,
                                             name="b_container")

        # This initialises the TensorArray with the weights broken up in to pieces.
        # The reason this has to be a TensorArray is so that we can index it with a tensor(!)
        self.weight_container = self.weight_container.unpack(untied_weights)
        self.bias_container = self.bias_container.unpack(untied_biases)

示例#3

0

显示文件

    def decoder_step(self, rnn_output):
        """Linear softmax answer module"""
        with tf.variable_scope("answer",
                               reuse=True,
                               initializer=_xavier_weight_init()):

            rnn_output = tf.nn.dropout(rnn_output, self.dropout_placeholder)

            U_p = tf.get_variable("U")
            b_p = tf.get_variable("bias_p")

            vocab_probs = tf.matmul(rnn_output, U_p) + b_p

            output_probs = tf.nn.softmax(vocab_probs)

            return vocab_probs, output_probs

示例#4

0

显示文件

文件： dmn_plus.py 项目： pnii/neural-engineers-first-attempt

    def add_decode_variables(self):
        """Adds trainable variables which are later (not?) reused"""
        """
        for i in range(self.total_input_hops):
            with tf.variable_scope("memory/decode" + "/" + str(i), initializer=_xavier_weight_init()):
                #Wt = tf.get_variable("W_t", (2*self.config.hidden_size+self.config.embed_size, self.config.hidden_size))
                #'''
                Wt = tf.get_variable("W_t", (2*self.config.hidden_size, self.config.hidden_size))
                bt = tf.get_variable("bias_t", (self.config.hidden_size,))
                #'''
                #"""
        '''based on github.com/tensorflow/tensorflow/issues/5608#issuecomment-260549420'''
        self.total_input_hops = self.config.num_hops  #version for if you want to set it
        #self.total_input_hops = self.max_input_len - 6 #don't need to attend to headers
        #'''
        with tf.variable_scope("memory/decode",
                               initializer=_xavier_weight_init()):
            untied_weights = tf.get_variable(
                "W_t", (self.total_input_hops, 2 * self.config.hidden_size,
                        self.config.hidden_size))
            untied_biases = tf.get_variable("bias_t", (
                self.total_input_hops,
                self.config.hidden_size,
            ))
            #'''

        # The clear_after_read variable must be False, otherwise the TA will
        # only allow you to read from that index once.
        self.weight_container = tf.TensorArray(tf.float32,
                                               self.total_input_hops,
                                               clear_after_read=False,
                                               dynamic_size=None,
                                               name="w_container")

        self.bias_container = tf.TensorArray(tf.float32,
                                             self.total_input_hops,
                                             clear_after_read=False,
                                             dynamic_size=None,
                                             name="b_container")

        # This initialises the TensorArray with the weights broken up in to pieces.
        # The reason this has to be a TensorArray is so that we can index it with a tensor(!)
        self.weight_container = self.weight_container.unpack(untied_weights)
        self.bias_container = self.bias_container.unpack(untied_biases)

示例#5

0

显示文件

    def _attention_GRU_step(self, rnn_input, h, g):
        """Implement attention GRU as described by https://arxiv.org/abs/1603.01417"""
        with tf.variable_scope("attention_gru",
                               reuse=True,
                               initializer=_xavier_weight_init()):

            Wr = tf.get_variable("Wr")
            Ur = tf.get_variable("Ur")
            br = tf.get_variable("bias_r")

            W = tf.get_variable("W")
            U = tf.get_variable("U")
            bh = tf.get_variable("bias_h")

            r = tf.sigmoid(tf.matmul(rnn_input, Wr) + tf.matmul(h, Ur) + br)
            h_hat = tf.tanh(tf.matmul(rnn_input, W) + r * tf.matmul(h, U) + bh)
            rnn_output = g * h_hat + (1 - g) * h

            return rnn_output

示例#6

0

显示文件

    def get_attention(self, prev_memory, fact_vec):
        """Use question vector and previous memory to create scalar attention for current fact"""
        with tf.variable_scope("attention",
                               reuse=True,
                               initializer=_xavier_weight_init()):

            W_1 = tf.get_variable("W_1")
            b_1 = tf.get_variable("bias_1")

            W_2 = tf.get_variable("W_2")
            b_2 = tf.get_variable("bias_2")

            features = [fact_vec * prev_memory, tf.abs(fact_vec - prev_memory)]

            feature_vec = tf.concat(1, features)

            attention = tf.matmul(tf.tanh(tf.matmul(feature_vec, W_1) + b_1),
                                  W_2) + b_2

        return attention

示例#7

0

显示文件

    def normal_GRU_step(self, rnn_input, h):
        """Implement normal GRU"""
        with tf.variable_scope("normal_gru",
                               reuse=True,
                               initializer=_xavier_weight_init()):

            Wu = tf.get_variable("Wu")
            Uu = tf.get_variable("Uu")
            bu = tf.get_variable("bias_u")

            Wr = tf.get_variable("Wr")
            Ur = tf.get_variable("Ur")
            br = tf.get_variable("bias_r")

            W = tf.get_variable("W")
            U = tf.get_variable("U")
            bh = tf.get_variable("bias_h")

            u = tf.sigmoid(tf.matmul(rnn_input, Wu) + tf.matmul(h, Uu) + bu)
            r = tf.sigmoid(tf.matmul(rnn_input, Wr) + tf.matmul(h, Ur) + br)
            h_hat = tf.tanh(tf.matmul(rnn_input, W) + r * tf.matmul(h, U) + bh)
            rnn_output = u * h_hat + (1 - u) * h

            return rnn_output

示例#8

0

显示文件

    def add_reused_variables(self):
        """Adds trainable variables which are later reused"""
        gru_cell = tf.nn.rnn_cell.GRUCell(self.config.hidden_size)
        self.shared_gru_cell_before_dropout = SharedGRUCell(
            self.config.hidden_size)

        attn_length = 1
        '''^DEFINATELY TRY OUT DIFFERENT LENGTHS'''
        with tf.variable_scope('input/forward',
                               initializer=_xavier_weight_init(),
                               reuse=True):
            self.intra_attention_GRU_cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                tf.contrib.rnn.AttentionCellWrapper(
                    self.shared_gru_cell_before_dropout,
                    attn_length,
                    state_is_tuple=False),
                input_keep_prob=self.dropout_placeholder,
                output_keep_prob=self.dropout_placeholder)

        with tf.variable_scope('input/backward',
                               initializer=_xavier_weight_init(),
                               reuse=True):
            self.intra_attention_GRU_cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                tf.contrib.rnn.AttentionCellWrapper(
                    self.shared_gru_cell_before_dropout,
                    attn_length,
                    state_is_tuple=False),
                input_keep_prob=self.dropout_placeholder,
                output_keep_prob=self.dropout_placeholder)

        # apply droput to grus if flag set
        if self.config.drop_grus:
            self.gru_cell = tf.nn.rnn_cell.DropoutWrapper(
                gru_cell,
                input_keep_prob=self.dropout_placeholder,
                output_keep_prob=self.dropout_placeholder)
        else:
            self.gru_cell = gru_cell

        with tf.variable_scope("memory/attention",
                               initializer=_xavier_weight_init()):
            b_1 = tf.get_variable("bias_1", (self.config.embed_size, ))
            W_1 = tf.get_variable(
                "W_1",
                (self.config.embed_size * self.config.num_attention_features,
                 self.config.embed_size))

            W_2 = tf.get_variable("W_2", (self.config.embed_size, 1))
            b_2 = tf.get_variable("bias_2", 1)

        with tf.variable_scope("memory/attention_gru",
                               initializer=_xavier_weight_init()):
            Wr = tf.get_variable(
                "Wr", (self.config.embed_size, self.config.hidden_size))
            Ur = tf.get_variable(
                "Ur", (self.config.hidden_size, self.config.hidden_size))
            br = tf.get_variable("bias_r", (1, self.config.hidden_size))

            W = tf.get_variable(
                "W", (self.config.embed_size, self.config.hidden_size))
            U = tf.get_variable(
                "U", (self.config.hidden_size, self.config.hidden_size))
            bh = tf.get_variable("bias_h", (1, self.config.hidden_size))

        with tf.variable_scope("memory/normal_gru",
                               initializer=_xavier_weight_init()):

            Wu = tf.get_variable(
                "Wu", (self.config.embed_size + self.target_vocab_size,
                       self.config.hidden_size))
            Uu = tf.get_variable(
                "Uu", (self.config.hidden_size, self.config.hidden_size))
            bu = tf.get_variable("bias_u", (1, self.config.hidden_size))

            Wr = tf.get_variable(
                "Wr", (self.config.embed_size + self.target_vocab_size,
                       self.config.hidden_size))
            Ur = tf.get_variable(
                "Ur", (self.config.hidden_size, self.config.hidden_size))
            br = tf.get_variable("bias_r", (1, self.config.hidden_size))

            W = tf.get_variable(
                "W", (self.config.embed_size + self.target_vocab_size,
                      self.config.hidden_size))
            U = tf.get_variable(
                "U", (self.config.hidden_size, self.config.hidden_size))
            bh = tf.get_variable("bias_h", (1, self.config.hidden_size))

        with tf.variable_scope("memory/answer",
                               initializer=_xavier_weight_init()):

            U_p = tf.get_variable(
                "U", (self.config.embed_size, self.target_vocab_size))
            b_p = tf.get_variable("bias_p", (self.target_vocab_size, ))