示例#1
0
    def block(self, wembed, kernel_sz, num_filt, num_layers, reuse=False):
       
        dilation_rate = 2
        initialization = 'identity'
        nonlinearity = 'relu'

        input_tensor = wembed
        with tf.variable_scope('iterated-block', reuse=reuse):
            for i in range(0, num_layers):
                if i == num_layers-1:
                    dilation_rate = 1
                filter_shape = [1, kernel_sz, num_filt, num_filt]
                w = tf_utils.initialize_weights(filter_shape, 'conv-'+ str(i) + "_w", init_type=initialization, gain=nonlinearity, divisor=self.num_classes)
                b = tf.get_variable('conv-'+ str(i) + "_b", initializer=tf.constant(0.0 if initialization == "identity" or initialization == "varscale" else 0.001, shape=[num_filt]))
                        
                conv = tf.nn.atrous_conv2d(input_tensor, 
                                            w, 
                                            rate=dilation_rate**i, 
                                            padding="SAME", 
                                            name='conv-'+ str(i))
                conv_b = tf.nn.bias_add(conv, b)
                nonlinearity = tf_utils.apply_nonlinearity(conv_b, "relu")

                input_tensor = nonlinearity + input_tensor

                tf.summary.histogram('conv-'+str(i), input_tensor)
                # input_tensor = tf.nn.relu(input_tensor, name="relu-"+str(i))

            return input_tensor
示例#2
0
 def do_projection():
     # Project raw outputs down
     with tf.name_scope("projection"):
         projection_width = int(total_output_width/(2*len(hidden_outputs)))
         w_p = tf_utils.initialize_weights([total_output_width, projection_width], "w_p", init_type="xavier")
         b_p = tf.get_variable("b_p", initializer=tf.constant(0.01, shape=[projection_width]))
         projected = tf.nn.xw_plus_b(h_drop, w_p, b_p, name="projected")
         projected_nonlinearity = tf_utils.apply_nonlinearity(projected, self.nonlinearity)
     return projected_nonlinearity, projection_width
示例#3
0
    def forward(self, hidden_keep, input_keep, middle_keep, reuse=True):
        """
        used to determine the actual graph.
        returns (intermediate_probs, probs). technically probs is the last layer of 
        the intermediate probs.
        """
        block_unflat_scores = []

        with tf.variable_scope("forward", reuse=reuse):
            with tf.control_dependencies([self.we0]):
                wembed = tf.nn.embedding_lookup(self.Ww, self.x, name="embeddings")

            with tf.control_dependencies([self.ce0]):
                xch_seq = tensorToSeq(self.xch)
                cembed_seq = []
                for i, xch_i in enumerate(xch_seq):
                    cembed_seq.append(shared_char_word(self.Wc, xch_i, self.filtsz, self.char_dsz, self.wsz, None if (i == 0 and not reuse)  else True))
                word_char = seqToTensor(cembed_seq)

            input_feats = tf.concat([wembed, word_char], 2)
            input_feats_expanded = tf.expand_dims(input_feats, 1)
            input_feats_expanded_drop = tf.nn.dropout(input_feats_expanded, self.input_dropout_keep_prob)

            # first projection of embeddings
            filter_shape = [1, self.kernel_size, input_feats.get_shape()[2], self.num_filt]

            w = tf_utils.initialize_weights(filter_shape, "conv_start" + "_w", init_type='xavier', gain='relu')
            b = tf.get_variable("conv_start" + "_b", initializer=tf.constant(0.01, shape=[self.num_filt]))
            conv0 = tf.nn.conv2d(input_feats_expanded_drop, w, strides=[1, 1, 1, 1], padding="SAME", name="conv_start")
            h0 = tf_utils.apply_nonlinearity(tf.nn.bias_add(conv0, b), 'relu')

            initial_inputs = [h0]
            last_dims = self.num_filt

            self.share_repeats = True
            self.projection = False

            # Stacked atrous convolutions
            last_output = tf.concat(axis=3, values=initial_inputs)

            for iteration in range(self.num_iterations):
                hidden_outputs = []
                total_output_width = self.num_filt
                reuse_block = (iteration != 0)
                block_name_suff = "" if self.share_repeats else str(block)
                inner_last_dims = last_dims
                inner_last_output = last_output
                with tf.variable_scope("block" + block_name_suff, reuse=reuse_block):
                    block_output = self.block(inner_last_output, self.kernel_size, self.num_filt, self.num_layers, reuse=reuse_block)

                    #legacy strubell logic. we only grab the last layer of the block here. always.
                    h_concat = tf.concat(axis=3, values=[block_output])
                    last_output = tf.nn.dropout(h_concat, self.middle_dropout_keep_prob)
                    last_dims = total_output_width

                    h_concat_squeeze = tf.squeeze(h_concat, [1])

                    h_concat_flat = tf.reshape(h_concat_squeeze, [-1, total_output_width])


                    # Add dropout
                    with tf.name_scope("hidden_dropout"):
                        h_drop = tf.nn.dropout(h_concat_flat, self.hidden_dropout_keep_prob)

                    def do_projection():
                        # Project raw outputs down
                        with tf.name_scope("projection"):
                            projection_width = int(total_output_width/(2*len(hidden_outputs)))
                            w_p = tf_utils.initialize_weights([total_output_width, projection_width], "w_p", init_type="xavier")
                            b_p = tf.get_variable("b_p", initializer=tf.constant(0.01, shape=[projection_width]))
                            projected = tf.nn.xw_plus_b(h_drop, w_p, b_p, name="projected")
                            projected_nonlinearity = tf_utils.apply_nonlinearity(projected, self.nonlinearity)
                        return projected_nonlinearity, projection_width

                    # only use projection if we wanted to, and only apply middle dropout here if projection
                    input_to_pred, proj_width = do_projection() if self.projection else (h_drop, total_output_width)
                    input_to_pred_drop = tf.nn.dropout(input_to_pred, self.middle_dropout_keep_prob) if self.projection else input_to_pred

                    # Final (unnormalized) scores and predictions
                    with tf.name_scope("output"+block_name_suff):
                        w_o = tf_utils.initialize_weights([proj_width, self.num_classes], "w_o", init_type="xavier")
                        b_o = tf.get_variable("b_o", initializer=tf.constant(0.01, shape=[self.num_classes]))
                        self.l2_loss += tf.nn.l2_loss(w_o)
                        self.l2_loss += tf.nn.l2_loss(b_o)
                        scores = tf.nn.xw_plus_b(input_to_pred_drop, w_o, b_o, name="scores")

                        unflat_scores = tf.reshape(scores, tf.stack([-1, self.mxlen, self.num_classes]))

                        block_unflat_scores.append(unflat_scores)

                        # probs = unflat_scores
                        # best = tf.argmax(self.probs, 2)
                        # intermediate_probs = tf.stack(block_unflat_scores, -1)
        return block_unflat_scores, unflat_scores