Exemplo n.º 1
0
def _conv_opt(inputs,
              window_size,
              out_filters,
              separable=False,
              ch_mul=1,
              is_mask=True):
    inp_d = inputs.get_shape()[1].value
    inp_l = inputs.get_shape()[2].value

    if is_mask:
        inputs = tf.transpose(inputs, [0, 2, 1])  # [batch_size, len, dim]
        valid_length = get_length(inputs)
        mask = tf.sequence_mask(valid_length, inp_l)
        mask = tf.expand_dims(mask, 1)
        mask = tf.tile(mask, [1, inp_d, 1])
        #mask = tf.expand_dims(mask, 2)  # [batch_size, dim, 1, len]
        inputs = tf.transpose(inputs, [0, 2, 1])  # [batch_size, len, dim]
        inputs = tf.reshape(inputs, [-1, inp_d, inp_l])
        inputs = tf.where(mask, inputs, tf.zeros_like(inputs))
        #print("is mask", inputs.shape)

    if separable == True:
        w_depth = create_weight("w_depth", [window_size, out_filters, ch_mul])
        w_point = create_weight("w_point",
                                [1, out_filters * ch_mul, out_filters])
        out = tf.nn.separable_conv1d(inputs,
                                     w_depth,
                                     w_point,
                                     strides=1,
                                     padding="SAME",
                                     data_format='NCW')
    else:
        w = create_weight("w", [window_size, inp_d, out_filters])
        #print("w_weight", w.shape, inputs.shape)
        out = tf.nn.conv1d(inputs, w, 1, "SAME", data_format='NCW')
        #print("out", out.shape)

    if is_mask:
        mask = tf.sequence_mask(valid_length, inp_l)
        mask = tf.expand_dims(mask, 1)
        mask = tf.tile(mask, [1, out_filters, 1])
        #mask = tf.expand_dims(mask, 2)  # [batch_size, dim, 1, len]
        out = tf.where(mask, out, tf.zeros_like(out))
        out = tf.reshape(out, [-1, out_filters, inp_l])

    return out
Exemplo n.º 2
0
 def _linear_combine(self, final_layers):
     with tf.variable_scope("linear_combine"):
         w = create_weight("w", [len(final_layers), 1, 1, 1])
         w = tf.nn.softmax(w, axis=0)
         final_layer_tensor = tf.convert_to_tensor(final_layers)
         print("final_layer_tensor: {0}".format(final_layer_tensor))
         x = tf.multiply(final_layer_tensor, w)
         x = tf.reduce_sum(x, axis=0)
         print("final_layer_tensor: {0}".format(x))
     return x
Exemplo n.º 3
0
def pool_op(inputs,
            is_training,
            count,
            out_filters,
            avg_or_max,
            start_idx=None):
    """
    Args:
        start_idx: where to start taking the output channels. if None, assuming
            fixed_arc mode
        count: how many output_channels to take.
    """

    inp_d = inputs.get_shape()[1].value

    with tf.variable_scope("conv_1"):
        w = create_weight("w", [1, inp_d, out_filters])
        x = tf.nn.conv1d(inputs, w, 1, "SAME", data_format='NCW')
        x = batch_norm(x, is_training)
        x = tf.nn.relu(x)

    with tf.variable_scope("pool"):
        actual_data_format = "channels_first"

        if avg_or_max == "avg":
            x = tf.layers.average_pooling1d(x,
                                            3,
                                            1,
                                            "SAME",
                                            data_format=actual_data_format)
        elif avg_or_max == "max":
            x = tf.layers.max_pooling1d(x,
                                        3,
                                        1,
                                        "SAME",
                                        data_format=actual_data_format)
        else:
            raise ValueError("Unknown pool {}".format(avg_or_max))

        if start_idx is not None:
            x = x[:, start_idx:start_idx + count, :]

    return x
Exemplo n.º 4
0
    def _embedding(self,
                   inputs,
                   vocab_size,
                   num_units,
                   zero_pad=True,
                   scale=True,
                   scope="embedding",
                   reuse=None):
        with tf.variable_scope(scope, reuse=reuse):
            lookup_table = create_weight(
                "lookup_table",
                trainable=True,
                shape=[vocab_size, num_units],
                initializer=tf.contrib.layers.xavier_initializer())
            if zero_pad:
                lookup_table = tf.concat(
                    (tf.zeros(shape=[1, num_units]), lookup_table[1:, :]), 0)
            outputs = tf.nn.embedding_lookup(lookup_table, inputs)
            if scale:
                outputs = outputs * (num_units**0.5)

        return outputs
Exemplo n.º 5
0
    def _model(self,
               doc,
               bow_doc,
               datasets,
               is_training,
               reuse=False,
               mode="train"):

        with tf.variable_scope(self.name, reuse=reuse):
            layers = []

            if is_training:
                self.valid_lengths = []
            with tf.variable_scope('embed'):
                regularizer = tf.contrib.layers.l2_regularizer(
                    scale=self.l2_reg)
                if self.embedding_model == "none":
                    embedding = create_weight(
                        "w",
                        shape=self.embedding["none"].shape,
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                elif self.embedding_model == "glove":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                elif self.embedding_model == "word2vec":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                elif self.embedding_model == "all":
                    embedding_glove = create_weight(
                        "w_glove",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                    print("embedding_glove: {0}".format(
                        embedding_glove.get_shape()))
                    embedding_word2vec = create_weight(
                        "w_word2vec",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                    print("embedding_word2vec: {0}".format(
                        embedding_word2vec.get_shape()))
                    embedding = tf.concat(
                        [embedding_glove, embedding_word2vec], axis=0)
                    print("join embedding: {0}".format(embedding.get_shape()))
                field_embedding = create_weight(
                    "w_field",
                    shape=self.embedding["field"].shape,
                    trainable=True,
                    initializer=tf.truncated_normal_initializer,
                    regularizer=regularizer)

                self.final_embedding = embedding
                print("embedding: {0}".format(embedding))
                print("doc: {0}".format(doc))
                print("bow_doc: {0}".format(bow_doc))

                if is_training or mode == "valid":
                    batch_size = self.batch_size
                else:
                    batch_size = self.eval_batch_size

                if self.sliding_window:
                    doc, sliding_windows = self._to_sliding_window(doc,
                                                                   batch_size,
                                                                   size=64,
                                                                   step=32)
                    bow_doc, _ = self._to_sliding_window(bow_doc,
                                                         batch_size,
                                                         size=64,
                                                         step=32)
                    print("doc after sliding window: {0}".format(doc))

                if is_training:
                    embedding = tf.nn.dropout(embedding,
                                              keep_prob=self.embed_keep_prob)

                doc = tf.nn.embedding_lookup(embedding, doc, max_norm=None)
                field_embedding = tf.nn.embedding_lookup(field_embedding,
                                                         bow_doc,
                                                         max_norm=None)
                if self.input_field_embedding:
                    doc = tf.add_n([doc, field_embedding])
                doc = tf.transpose(doc, [0, 2, 1])

                print("doc_shape", doc.shape)
                inp_c = doc.shape[1]
                inp_w = doc.shape[2]
                #doc = tf.reshape(doc, [-1, inp_c, 1, inp_w])
                doc = tf.reshape(doc, [-1, inp_c, inp_w])
                field_embedding = tf.transpose(field_embedding, [0, 2, 1])
                #field_embedding = tf.reshape(field_embedding, [-1, inp_c, 1, inp_w])
                field_embedding = tf.reshape(field_embedding,
                                             [-1, inp_c, inp_w])

                print("after: doc, field_embedding", doc.shape,
                      field_embedding.shape)

            x = doc
            pos_batch_size = 1
            # initialize pos_embedding for transformer
            if self.input_positional_encoding:
                out_filters = 300
            else:
                out_filters = self.out_filters
            if self.is_sinusolid:
                pos_embedding = self._positional_encoding(
                    x,
                    pos_batch_size,
                    is_training,
                    num_units=out_filters,
                    zero_pad=False,
                    scale=False,
                    scope="enc_pe")
            else:
                pos_embedding = self._embedding(tf.tile(
                    tf.expand_dims(tf.range(inp_w), 0), [pos_batch_size, 1]),
                                                vocab_size=inp_w,
                                                num_units=out_filters,
                                                reuse=tf.AUTO_REUSE,
                                                zero_pad=True,
                                                scale=False,
                                                scope="enc_pe")
            print("pos embedding: {0}".format(pos_embedding))
            pos_embedding = tf.transpose(pos_embedding, [0, 2, 1])
            #pos_embedding = tf.expand_dims(pos_embedding, axis=2)
            print("pos embedding: {0}".format(pos_embedding))
            if self.input_positional_encoding:
                x += pos_embedding

            out_filters = self.out_filters
            with tf.variable_scope("init_conv"):  # adjust out_filter dimension
                #print("init_x", x.shape)
                x = _conv_opt(x, 1, self.out_filters)
                x = batch_norm(x, is_training)

                layers.append(x)

            # sveral operations for nni
            def add_fixed_pooling_layer(layer_id, layers, out_filters,
                                        is_training, pos_embedding,
                                        field_embedding):
                '''Add a fixed pooling layer every four layers'''
                with tf.variable_scope("pos_embed_pool_{0}".format(layer_id)):
                    pos_embedding = self._factorized_reduction(
                        pos_embedding, out_filters, 2, is_training)

                with tf.variable_scope(
                        "field_embed_pool_{0}".format(layer_id)):
                    field_embedding = self._factorized_reduction(
                        field_embedding, out_filters, 2, is_training)

                #out_filters *= 2
                with tf.variable_scope("pool_at_{0}".format(layer_id)):
                    pooled_layers = []
                    for i, layer in enumerate(layers):
                        #print("pooling_layer", i, layer)
                        with tf.variable_scope("from_{0}".format(i)):
                            x = self._factorized_reduction(
                                layer, out_filters, 2, is_training)
                            #print("after x ", x)
                        pooled_layers.append(x)

                    layers = pooled_layers

                    return layers, out_filters

            def post_process_out(inputs, out):
                '''Form skip connection and perform batch norm'''
                optional_inputs = inputs[1]
                print("post_process_out::", inputs, optional_inputs)
                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope("skip"):
                        #print("layers",layers)
                        inputs = layers[-1]

                        inp_d = inputs.get_shape()[1].value
                        inp_l = inputs.get_shape()[2].value

                        out.set_shape([None, out_filters, inp_l])
                        try:
                            out = tf.add_n(
                                [out,
                                 tf.reduce_sum(optional_inputs, axis=0)])
                        except Exception as e:
                            print(e)
                        out = batch_norm(out, is_training)
                layers.append(out)
                return out

            global layer_id
            layer_id = -1

            def get_layer_id():
                global layer_id
                layer_id += 1
                return 'layer_' + str(layer_id)

            size = [1, 3, 5, 7]
            separables = [False, False, False, False]

            def conv(inputs, size, separable=False):
                # res_layers is pre_layers that are chosen to form skip connection
                # layers[-1] is always the latest input
                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('conv_' + str(size) + (
                            '_separable' if separable else '')):
                        #print("conv_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = conv_op(dealed_inputs,
                                      size,
                                      is_training,
                                      out_filters,
                                      out_filters,
                                      start_idx=None,
                                      separable=separable)
                #layers.append(out)
                return out

            def pool(inputs, ptype):
                assert ptype in ['avg',
                                 'max'], "pooling type must be avg or max"

                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('pooling_' + str(ptype)):
                        #print("pool_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = pool_op(dealed_inputs,
                                      is_training,
                                      out_filters,
                                      out_filters,
                                      ptype,
                                      start_idx=None)
                #layers.append(out)
                return out

            def rnn(inputs):

                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('branch_6'):
                        #print("rnn_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = recur_op(dealed_inputs,
                                       is_training,
                                       out_filters,
                                       out_filters,
                                       start_idx=0,
                                       lstm_x_keep_prob=self.lstm_x_keep_prob,
                                       lstm_h_keep_prob=self.lstm_h_keep_prob,
                                       lstm_o_keep_prob=self.lstm_o_keep_prob,
                                       var_rec=self.var_rec)
                #layers.append(out)
                return out

            def attention(inputs):

                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('branch_7'):
                        #print("attention_inputs::", inputs)
                        dealed_inputs = tf.reduce_sum(inputs[1], axis=0)
                        #print("dealed_inputs::", dealed_inputs)
                        out = attention_op(
                            dealed_inputs,
                            pos_embedding,
                            field_embedding,
                            is_training,
                            out_filters,
                            out_filters,
                            start_idx=0,
                            positional_encoding=self.positional_encoding,
                            attention_keep_prob=self.attention_keep_prob,
                            do_field_embedding=self.field_embedding)
                #layers.append(out)
                return out

            def final_process(inputs):
                with tf.variable_scope(get_layer_id()):
                    with tf.variable_scope('final_out'):
                        print("final_inputs::", inputs)
                        dealed_inputs = tf.reduce_mean(inputs[1], axis=0)
                        print("dealed_inputs::", dealed_inputs)
                        out = dealed_inputs
                        #out = tf.reduce_mean(inputs[1], axis=0)
                        print("final_out::", inputs, out)
                layers.append(out)
                return out

            """@nni.mutable_layers(
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x],
          optional_input_size: 1,
          layer_output: layer_0_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_0_out_0)],
          optional_inputs: [],
          optional_input_size: 1,
          layer_output: layer_0_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out],
          optional_input_size: 1,
          layer_output: layer_1_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_1_out_0)],
          optional_inputs: [layer_0_out],
          optional_input_size: 1,
          layer_output: layer_1_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out],
          optional_input_size: 1,
          layer_output: layer_2_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_2_out_0)],
          optional_inputs: [layer_0_out, layer_1_out],
          optional_input_size: 1,
          layer_output: layer_2_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out],
          optional_input_size: 1,
          layer_output: layer_3_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_3_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out],
          optional_input_size: 1,
          layer_output: layer_3_out
      }
      )"""
            layers, out_filters = add_fixed_pooling_layer(
                3, layers, out_filters, is_training, pos_embedding,
                field_embedding)
            x, layer_0_out, layer_1_out, layer_2_out, layer_3_out = layers[-5:]
            print("layer_out", x, layer_0_out, layer_1_out, layer_2_out,
                  layer_3_out)
            """@nni.mutable_layers(
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out],
          optional_input_size: 1,
          layer_output: layer_4_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_4_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out],
          optional_input_size: 1,
          layer_output: layer_4_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out],
          optional_input_size: 1,
          layer_output: layer_5_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_5_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out],
          optional_input_size: 1,
          layer_output: layer_5_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out],
          optional_input_size: 1,
          layer_output: layer_6_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_6_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out],
          optional_input_size: 1,
          layer_output: layer_6_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out],
          optional_input_size: 1,
          layer_output: layer_7_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_7_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out],
          optional_input_size: 1,
          layer_output: layer_7_out
      }
      )"""
            layers, out_filters = add_fixed_pooling_layer(
                7, layers, out_filters, is_training, pos_embedding,
                field_embedding)
            x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out = layers[
                -9:]
            """@nni.mutable_layers(
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out],
          optional_input_size: 1,
          layer_output: layer_8_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_8_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out],
          optional_input_size: 1,
          layer_output: layer_8_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out],
          optional_input_size: 1,
          layer_output: layer_9_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_9_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out],
          optional_input_size: 1,
          layer_output: layer_9_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out],
          optional_input_size: 1,
          layer_output: layer_10_out_0
      },
      {
          layer_choice: [post_process_out(out=layer_10_out_0)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out],
          optional_input_size: 1,
          layer_output: layer_10_out
      },
      {
          layer_choice: [conv(size=1), conv(size=3), conv(size=5), conv(size=7), pool(ptype='avg'), pool(ptype='max'), rnn(), attention()],
          optional_inputs: [x, layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out],
          optional_input_size: 1,
          layer_output: layer_11_out_1
      },
      {
          layer_choice: [post_process_out(out=layer_11_out_1)],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out],
          optional_input_size: 1,
          layer_output: layer_11_out
      },
      {
          layer_choice: [final_process()],
          optional_inputs: [layer_0_out, layer_1_out, layer_2_out, layer_3_out, layer_4_out, layer_5_out, layer_6_out, layer_7_out, layer_8_out, layer_9_out, layer_10_out, layer_11_out],
          optional_input_size: 1,
          layer_output: final_out
      }
      )"""

            print("len_layers: ", len(layers))
            x = final_out

            if self.sliding_window:
                x = self._from_sliding_window(x, batch_size, sliding_windows)

            class_num = self.class_num
            with tf.variable_scope("fc"):
                if not self.is_output_attention:
                    x = tf.reduce_mean(x, 2)
                else:
                    batch_size = x.get_shape()[0].value
                    inp_d = x.get_shape()[1].value
                    inp_l = x.get_shape()[2].value

                    final_attention_query = create_weight(
                        "query",
                        shape=[1, inp_d],
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                    if is_training or mode == "valid":
                        batch_size = self.batch_size
                    else:
                        batch_size = self.eval_batch_size
                    final_attention_query = tf.tile(final_attention_query,
                                                    [batch_size, 1])
                    print("final_attention_query: {0}".format(
                        final_attention_query))

                    #put channel to the last dim
                    x = tf.transpose(x, [0, 2, 1])
                    x = tf.reshape(x, [-1, inp_l, inp_d])
                    print("x: {0}".format(x))
                    x = multihead_attention(
                        queries=final_attention_query,
                        keys=x,
                        pos_embedding=pos_embedding,
                        field_embedding=field_embedding,
                        num_units=inp_d,
                        num_heads=8,
                        dropout_rate=0,
                        is_training=is_training,
                        causality=False,
                        positional_encoding=self.positional_encoding)
                    print("x: {0}".format(x))
                    x = tf.reshape(x, [-1, 1, inp_d])
                    x = tf.reduce_sum(x, axis=1)
                    print("x: {0}".format(x))
                if is_training:
                    x = tf.nn.dropout(x, self.keep_prob)
                x = tf.layers.dense(x, units=class_num)

        return x
Exemplo n.º 6
0
    def _model(self,
               doc,
               bow_doc,
               datasets,
               is_training,
               reuse=False,
               mode="train"):

        with tf.variable_scope(self.name, reuse=reuse):
            layers = []
            final_flags = []

            if is_training:
                self.valid_lengths = []
            with tf.variable_scope('embed'):
                regularizer = tf.contrib.layers.l2_regularizer(
                    scale=self.l2_reg)
                if self.embedding_model == "none":
                    embedding = create_weight(
                        "w",
                        shape=self.embedding["none"].shape,
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                elif self.embedding_model == "glove":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                elif self.embedding_model == "word2vec":
                    embedding = create_weight(
                        "w",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                elif self.embedding_model == "all":
                    embedding_glove = create_weight(
                        "w_glove",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["glove"],
                        regularizer=regularizer)
                    print("embedding_glove: {0}".format(
                        embedding_glove.get_shape()))
                    embedding_word2vec = create_weight(
                        "w_word2vec",
                        shape=None,
                        trainable=True,
                        initializer=self.embedding["word2vec"],
                        regularizer=regularizer)
                    print("embedding_word2vec: {0}".format(
                        embedding_word2vec.get_shape()))
                    embedding = tf.concat(
                        [embedding_glove, embedding_word2vec], axis=0)
                    print("join embedding: {0}".format(embedding.get_shape()))
                field_embedding = create_weight(
                    "w_field",
                    shape=self.embedding["field"].shape,
                    trainable=True,
                    initializer=tf.truncated_normal_initializer,
                    regularizer=regularizer)

                self.final_embedding = embedding
                print("embedding: {0}".format(embedding))
                print("doc: {0}".format(doc))
                print("bow_doc: {0}".format(bow_doc))

                if is_training or mode == "valid":
                    batch_size = self.batch_size
                else:
                    batch_size = self.eval_batch_size

                if self.sliding_window:
                    doc, sliding_windows = self._to_sliding_window(doc,
                                                                   batch_size,
                                                                   size=64,
                                                                   step=32)
                    bow_doc, _ = self._to_sliding_window(bow_doc,
                                                         batch_size,
                                                         size=64,
                                                         step=32)
                    print("doc after sliding window: {0}".format(doc))

                if is_training:
                    embedding = tf.nn.dropout(embedding,
                                              keep_prob=self.embed_keep_prob)

                doc = tf.nn.embedding_lookup(embedding, doc, max_norm=None)
                field_embedding = tf.nn.embedding_lookup(field_embedding,
                                                         bow_doc,
                                                         max_norm=None)
                if self.input_field_embedding:
                    doc = tf.add_n([doc, field_embedding])
                doc = tf.transpose(doc, [0, 2, 1])

                print("doc_shape", doc.shape)
                inp_c = doc.shape[1]
                inp_w = doc.shape[2]
                #doc = tf.reshape(doc, [-1, inp_c, 1, inp_w])
                doc = tf.reshape(doc, [-1, inp_c, inp_w])
                field_embedding = tf.transpose(field_embedding, [0, 2, 1])
                #field_embedding = tf.reshape(field_embedding, [-1, inp_c, 1, inp_w])
                field_embedding = tf.reshape(field_embedding,
                                             [-1, inp_c, inp_w])

                print("after: doc, field_embedding", doc.shape,
                      field_embedding.shape)

            x = doc
            pos_batch_size = 1
            # initialize pos_embedding for transformer
            if self.input_positional_encoding:
                out_filters = 300
            else:
                out_filters = self.out_filters
            if self.is_sinusolid:
                pos_embedding = self._positional_encoding(
                    x,
                    pos_batch_size,
                    is_training,
                    num_units=out_filters,
                    zero_pad=False,
                    scale=False,
                    scope="enc_pe")
            else:
                pos_embedding = self._embedding(tf.tile(
                    tf.expand_dims(tf.range(inp_w), 0), [pos_batch_size, 1]),
                                                vocab_size=inp_w,
                                                num_units=out_filters,
                                                reuse=tf.AUTO_REUSE,
                                                zero_pad=True,
                                                scale=False,
                                                scope="enc_pe")
            print("pos embedding: {0}".format(pos_embedding))
            pos_embedding = tf.transpose(pos_embedding, [0, 2, 1])
            #pos_embedding = tf.expand_dims(pos_embedding, axis=2)
            print("pos embedding: {0}".format(pos_embedding))
            if self.input_positional_encoding:
                x += pos_embedding

            out_filters = self.out_filters
            with tf.variable_scope("init_conv"):  # adjust out_filter dimension
                #print("init_x", x.shape)
                x = _conv_opt(x, 1, self.out_filters)
                x = batch_norm(x, is_training)

            def add_fixed_pooling_layer(layer_id, layers, out_filters,
                                        is_training, pos_embedding,
                                        field_embedding):
                '''Add a fixed pooling layer every four layers'''
                with tf.variable_scope("pos_embed_pool_{0}".format(layer_id)):
                    pos_embedding = self._factorized_reduction(
                        pos_embedding, out_filters, 2, is_training)

                with tf.variable_scope(
                        "field_embed_pool_{0}".format(layer_id)):
                    field_embedding = self._factorized_reduction(
                        field_embedding, out_filters, 2, is_training)

                #out_filters *= 2
                with tf.variable_scope("pool_at_{0}".format(layer_id)):
                    pooled_layers = []
                    for i, layer in enumerate(layers):
                        #print("pooling_layer", i, layer)
                        with tf.variable_scope("from_{0}".format(i)):
                            x = self._factorized_reduction(
                                layer, out_filters, 2, is_training)
                            #print("after x ", x)
                        pooled_layers.append(x)

                    layers = pooled_layers

                    return layers, out_filters

            start_idx = 0
            print("xxxxx", x)

            for layer_id in range(self.num_layers):
                with tf.variable_scope("layer_{0}".format(layer_id)):
                    print("layers", layers)
                    print("layer_id, x", layer_id, x)

                    x = self._fixed_layer(x, pos_embedding, field_embedding,
                                          layer_id, layers, final_flags,
                                          start_idx, 0, out_filters,
                                          is_training)

                    layers.append(x)
                    if self.fixed_arc is not None:
                        final_flags.append(1)

                    print("sample_arc: {0}".format(self.sample_arc[start_idx]))
                    if layer_id in self.pool_layers:
                        layers, out_filters = add_fixed_pooling_layer(
                            layer_id, layers, out_filters, is_training,
                            pos_embedding, field_embedding)

                start_idx += 1 + layer_id
                if self.multi_path:
                    start_idx += 1
                print(layers[-1])

            print("all_layers:", layers)
            final_layers = []
            final_layers_idx = []
            for i in range(0, len(layers)):
                if self.all_layer_output:
                    if self.num_last_layer_output == 0:
                        final_layers.append(layers[i])
                        final_layers_idx.append(i)
                    elif i >= max(
                        (len(layers) - self.num_last_layer_output), 0):
                        final_layers.append(layers[i])
                        final_layers_idx.append(i)
                elif self.fixed_arc is not None and final_flags[i] == 1:
                    final_layers.append(layers[i])
                    final_layers_idx.append(i)
                elif self.fixed_arc is None:
                    final_layers.append(final_flags[i] * layers[i])

            if self.fixed_arc is not None:
                print("final_layers: {0}".format(' '.join(
                    [str(idx) for idx in final_layers_idx])))

            if self.fixed_arc is not None and self.output_linear_combine:
                x = self._linear_combine(final_layers)
            else:
                x = tf.add_n(final_layers)

            if self.sliding_window:
                x = self._from_sliding_window(x, batch_size, sliding_windows)

            class_num = self.class_num
            with tf.variable_scope("fc"):
                if not self.is_output_attention:
                    x = tf.reduce_mean(x, 2)
                else:
                    batch_size = x.get_shape()[0].value
                    inp_d = x.get_shape()[1].value
                    inp_l = x.get_shape()[2].value

                    final_attention_query = create_weight(
                        "query",
                        shape=[1, inp_d],
                        trainable=True,
                        initializer=tf.truncated_normal_initializer,
                        regularizer=regularizer)
                    if is_training or mode == "valid":
                        batch_size = self.batch_size
                    else:
                        batch_size = self.eval_batch_size
                    final_attention_query = tf.tile(final_attention_query,
                                                    [batch_size, 1])
                    print("final_attention_query: {0}".format(
                        final_attention_query))

                    # put channel to the last dim
                    x = tf.transpose(x, [0, 2, 1])
                    x = tf.reshape(x, [-1, inp_l, inp_d])
                    print("x: {0}".format(x))
                    x = multihead_attention(
                        queries=final_attention_query,
                        keys=x,
                        pos_embedding=pos_embedding,
                        field_embedding=field_embedding,
                        num_units=inp_d,
                        num_heads=8,
                        dropout_rate=0,
                        is_training=is_training,
                        causality=False,
                        positional_encoding=self.positional_encoding)
                    print("x: {0}".format(x))
                    x = tf.reshape(x, [-1, 1, inp_d])
                    x = tf.reduce_sum(x, axis=1)
                    print("x: {0}".format(x))
                if is_training:
                    x = tf.nn.dropout(x, self.keep_prob)
                x = tf.layers.dense(x, units=class_num)

        return x