Exemplo n.º 1
0
  def __init__(self):
    super(QCAttention, self).__init__()
    vocabulary.init()
    vocab_size = vocabulary.get_vocab_size() 

    self.embedding = wenzheng.Embedding(vocab_size, 
                                              FLAGS.emb_dim, 
                                              FLAGS.word_embedding_file, 
                                              trainable=FLAGS.finetune_word_embedding,
                                              vocab2_size=FLAGS.unk_vocab_size)

    self.num_layers = FLAGS.num_layers
    self.num_units = FLAGS.rnn_hidden_size
    self.keep_prob = FLAGS.keep_prob

    self.encode = wenzheng.Encoder(FLAGS.encoder_type)

    self.att_encode = melt.layers.CudnnRnn(num_layers=1, num_units=self.num_units, keep_prob=self.keep_prob)


    self.att_dot_attention = melt.layers.DotAttention(hidden=self.num_units, keep_prob=self.keep_prob, combiner=FLAGS.att_combiner)
    self.pooling = melt.layers.MaxPooling()

    self.logits = keras.layers.Dense(NUM_CLASSES, activation=None)
    self.logits2 = keras.layers.Dense(NUM_CLASSES, activation=None)
Exemplo n.º 2
0
    def __init__(self):
        super(Model2, self).__init__()
        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        ## adadelta adagrad will need cpu, so just use adam..
        #with tf.device('/cpu:0'):
        self.embedding = wenzheng.Embedding(
            vocab_size,
            FLAGS.emb_dim,
            FLAGS.word_embedding_file,
            trainable=FLAGS.finetune_word_embedding,
            vocab2_size=FLAGS.unk_vocab_size)

        self.num_layers = FLAGS.num_layers
        self.num_units = FLAGS.rnn_hidden_size
        self.keep_prob = FLAGS.keep_prob

        self.encode = melt.layers.CudnnRnn2(num_layers=self.num_layers,
                                            num_units=self.num_units,
                                            keep_prob=self.keep_prob)

        self.pooling = melt.layers.MaxPooling2()
        #self.pooling = keras.layers.GlobalMaxPool1D()

        self.logits = keras.layers.Dense(NUM_CLASSES, activation=None)
        self.logits2 = keras.layers.Dense(NUM_CLASSES, activation=None)
Exemplo n.º 3
0
def get_embedding(name='emb', height=None, emb_dim=None, trainable=True):
    emb_dim = emb_dim or FLAGS.emb_dim
    if height is None:
        vocabulary.init()
        height = vocabulary.get_vocab_size()

    # google transform use below
    #initializer=tf.random_normal_initializer(
    #            0., self.hidden_size ** -0.5)
    # squad use np.random.normal(scale=0.01)

    if FLAGS.emb_init == 'uniform':
        init_width = 0.5 / emb_dim
        emb = melt.variable.get_weights_uniform(name, [height, emb_dim],
                                                -init_width,
                                                init_width,
                                                trainable=trainable)
        logging.info('emb random_uniform init with width', init_width)
    elif FLAGS.emb_init == 'normal' or FLAGS.emb_init == 'random':
        stddev = FLAGS.emb_stddev or emb_dim**-0.5
        logging.info('emb random_normal init with stddev', stddev)
        emb = melt.variable.get_weights_random(name, [height, emb_dim],
                                               stddev,
                                               trainable=trainable)
    else:
        raise ValueError(FLAGS.emb_init)

    #return to above code if this works not better
    #emb = melt.variable.get_weights_truncated(name, [vocab_size, emb_dim], stddev=FLAGS.weight_stddev)
    return emb
Exemplo n.º 4
0
    def __init__(self):
        super(Model, self).__init__()
        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        ## adadelta adagrad will need cpu, so just use adam..
        #with tf.device('/cpu:0'):
        self.embedding = wenzheng.Embedding(
            vocab_size,
            FLAGS.emb_dim,
            FLAGS.word_embedding_file,
            trainable=FLAGS.finetune_word_embedding,
            vocab2_size=FLAGS.unk_vocab_size)
        self.num_layers = FLAGS.num_layers
        self.num_units = FLAGS.rnn_hidden_size
        self.keep_prob = FLAGS.keep_prob

        self.encode = wenzheng.Encoder(FLAGS.encoder_type)

        self.pooling = melt.layers.Pooling(FLAGS.encoder_output_method,
                                           top_k=FLAGS.top_k,
                                           att_activation=getattr(
                                               tf.nn, FLAGS.att_activation))

        self.logits = keras.layers.Dense(NUM_CLASSES)
        self.logits2 = keras.layers.Dense(NUM_CLASSES)
Exemplo n.º 5
0
    def __init__(self):
        super(MwAN, self).__init__()

        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()
        embedding_size = FLAGS.emb_dim
        encoder_size = FLAGS.rnn_hidden_size
        self.dropout = nn.Dropout(p=(1 - FLAGS.keep_prob))

        self.embedding = wenzheng.pyt.get_embedding(
            vocab_size, emb_dim, FLAGS.word_embedding_file,
            FLAGS.finetune_word_embedding)

        self.q_encoder = nn.GRU(input_size=embedding_size,
                                hidden_size=encoder_size,
                                batch_first=True,
                                bidirectional=True)
        self.p_encoder = nn.GRU(input_size=embedding_size,
                                hidden_size=encoder_size,
                                batch_first=True,
                                bidirectional=True)
        self.a_encoder = nn.GRU(input_size=embedding_size,
                                hidden_size=int(embedding_size / 2),
                                batch_first=True,
                                bidirectional=True)
        self.a_attention = nn.Linear(embedding_size, 1, bias=False)
        # Concat Attention
        self.Wc1 = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.Wc2 = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.vc = nn.Linear(encoder_size, 1, bias=False)
        # Bilinear Attention
        self.Wb = nn.Linear(2 * encoder_size, 2 * encoder_size, bias=False)
        # Dot Attention :
        self.Wd = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.vd = nn.Linear(encoder_size, 1, bias=False)
        # Minus Attention :
        self.Wm = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.vm = nn.Linear(encoder_size, 1, bias=False)

        self.Ws = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.vs = nn.Linear(encoder_size, 1, bias=False)

        self.gru_agg = nn.GRU(12 * encoder_size,
                              encoder_size,
                              batch_first=True,
                              bidirectional=True)
        """
        prediction layer
        """
        self.Wq = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.vq = nn.Linear(encoder_size, 1, bias=False)
        self.Wp1 = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.Wp2 = nn.Linear(2 * encoder_size, encoder_size, bias=False)
        self.vp = nn.Linear(encoder_size, 1, bias=False)
        self.prediction = nn.Linear(2 * encoder_size,
                                    embedding_size,
                                    bias=False)
        self.logits = nn.Linear(3, 3)
        self.logits2 = nn.Linear(3, 3)
        self.initiation()
Exemplo n.º 6
0
 def __init__(self, embedding=None):
     super(Fastai, self).__init__(embedding)
     vocabulary.init()
     vocab_size = vocabulary.get_vocab_size()
     emb_dim = FLAGS.emb_dim
     self.num_classes = NUM_CLASSES
     self.model = lele.fastai.text.classifier(
         vocab_size,
         NUM_ATTRIBUTES * self.num_classes,
         emb_sz=emb_dim,
         nl=FLAGS.num_layers,
         embedding_weight=FLAGS.word_embedding_file)
Exemplo n.º 7
0
    def __init__(self):
        super(Gru, self).__init__()
        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        emb_dim = FLAGS.emb_dim

        self.embedding = wenzheng.pyt.get_embedding(
            vocab_size, emb_dim, FLAGS.word_embedding_file,
            FLAGS.finetune_word_embedding)

        self.num_layers = FLAGS.num_layers
        self.num_units = FLAGS.rnn_hidden_size
        self.dropout = nn.Dropout(p=(1 - FLAGS.keep_prob))

        #self.encode = nn.GRU(input_size=emb_dim, hidden_size=self.num_units, batch_first=True, bidirectional=True)
        self.encode = lele.layers.StackedBRNN(
            input_size=emb_dim,
            hidden_size=self.num_units,
            num_layers=self.num_layers,
            dropout_rate=1 - FLAGS.keep_prob,
            dropout_output=False,
            concat_layers=False,
            rnn_type=nn.GRU,
            padding=FLAGS.rnn_padding,
        )
        ## Support mask
        #self.pooling = lele.layers.MaxPooling()

        self.pooling = lele.layers.Pooling(FLAGS.encoder_output_method,
                                           input_size=2 * self.num_units,
                                           top_k=FLAGS.top_k,
                                           att_activation=getattr(
                                               F, FLAGS.att_activation))

        # input dim not as convinient as tf..
        pre_logits_dim = self.pooling.output_size

        if FLAGS.use_type:
            pre_logits_dim += 1

        num_types = 2
        if FLAGS.use_type_emb:
            type_emb_dim = 10
            self.type_embedding = nn.Embedding(num_types, type_emb_dim)
            pre_logits_dim += type_emb_dim

        if FLAGS.use_type_rnn:
            self.type_embedding = nn.Embedding(num_types, emb_dim)

        self.logits = nn.Linear(pre_logits_dim, NUM_CLASSES)
Exemplo n.º 8
0
    def __init__(self):
        super(Bow, self).__init__()
        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        emb_dim = FLAGS.emb_dim

        self.embedding = wenzheng.pyt.get_embedding(
            vocab_size, emb_dim, FLAGS.word_embedding_file,
            FLAGS.finetune_word_embedding)

        self.num_layers = FLAGS.num_layers
        self.num_units = FLAGS.rnn_hidden_size
        self.dropout = nn.Dropout(p=(1 - FLAGS.keep_prob))
        self.encode = nn.GRU(input_size=emb_dim,
                             hidden_size=self.num_units,
                             batch_first=True,
                             bidirectional=True)

        #self.logits = nn.Linear(2 * self.num_units, NUM_CLASSES)
        self.logits = nn.Linear(emb_dim, NUM_CLASSES)
Exemplo n.º 9
0
    def __init__(self):
        super(Model, self).__init__()
        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()
        #self.embedding = keras.layers.Embedding(vocab_size, FLAGS.emb_dim)
        #with tf.device('/cpu:0'):
        self.embedding = wenzheng.utils.Embedding(
            vocab_size,
            FLAGS.emb_dim,
            FLAGS.word_embedding_file,
            trainable=FLAGS.finetune_word_embedding)

        #self.encode = MyLayer()
        self.num_layers = 1
        self.num_units = FLAGS.rnn_hidden_size
        self.keep_prob = 0.7
        self.encode = melt.layers.CudnnRnn(num_layers=self.num_layers,
                                           num_units=self.num_units,
                                           keep_prob=self.keep_prob)

        # self.encode = keras.layers.CuDNNGRU(units=FLAGS.rnn_hidden_size,
        # #self.encode = keras.layers.CuDNNLSTM(units=FLAGS.rnn_hidden_size,
        #                                     return_sequences=True,
        #                                     return_state=False,
        #                                     recurrent_initializer='glorot_uniform')

        #self.encode = keras.layers.GRU(units=FLAGS.rnn_hidden_size,
        #                     return_sequences=True,
        #                     return_state=False,
        #                     recurrent_activation='sigmoid',
        #                     recurrent_initializer='glorot_uniform')

        #self.pooling = keras.layers.GlobalMaxPool1D()
        self.pooling = melt.layers.MaxPooling()

        self.logits = keras.layers.Dense(NUM_CLASSES, activation=None)

        self.temp = MyModel()
Exemplo n.º 10
0
    def __init__(self, args=None):
        super(Rnet, self).__init__()
        # Store config
        if args is None:
            args = FLAGS
        self.args = args

        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(vocab_size, args.emb_dim, padding_idx=0)

        if FLAGS.word_embedding_file:
            self.embedding.weight.data.copy_(
                torch.from_numpy(np.load(FLAGS.word_embedding_file)))
            if not FLAGS.finetune_word_embedding:
                self.embedding.weight.requires_grad = False

        doc_input_size = args.emb_dim

        # Encoder
        self.encode_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.rnn_hidden_size,
            num_layers=args.num_layers,
            dropout_rate=1 - args.keep_prob,
            dropout_output=False,
            concat_layers=True,
            rnn_type=self.RNN_TYPES['gru'],
            padding=False,
        )

        # Output sizes of rnn encoder
        doc_hidden_size = 2 * args.rnn_hidden_size
        question_hidden_size = 2 * args.rnn_hidden_size

        #if args.concat_rnn_layers:
        doc_hidden_size *= args.num_layers
        question_hidden_size *= args.num_layers

        # Gated-attention-based RNN of the whole question
        self.question_attn = layers.SeqAttnMatch(question_hidden_size,
                                                 identity=False)
        self.question_attn_gate = layers.Gate(doc_hidden_size +
                                              question_hidden_size)
        self.question_attn_rnn = layers.StackedBRNN(
            input_size=doc_hidden_size + question_hidden_size,
            hidden_size=args.rnn_hidden_size,
            num_layers=1,
            dropout_rate=1 - args.keep_prob,
            dropout_output=False,
            concat_layers=False,
            rnn_type=self.RNN_TYPES['gru'],
            padding=False,
        )

        question_attn_hidden_size = 2 * args.rnn_hidden_size

        # Self-matching-attention-baed RNN of the whole doc
        self.doc_self_attn = layers.SelfAttnMatch(question_attn_hidden_size,
                                                  identity=False)
        self.doc_self_attn_gate = layers.Gate(question_attn_hidden_size +
                                              question_attn_hidden_size)
        self.doc_self_attn_rnn = layers.StackedBRNN(
            input_size=question_attn_hidden_size + question_attn_hidden_size,
            hidden_size=args.rnn_hidden_size,
            num_layers=1,
            dropout_rate=1 - args.keep_prob,
            dropout_output=False,
            concat_layers=False,
            rnn_type=self.RNN_TYPES['gru'],
            padding=False,
        )

        doc_self_attn_hidden_size = 2 * args.rnn_hidden_size

        self.doc_self_attn_rnn2 = layers.StackedBRNN(
            input_size=doc_self_attn_hidden_size,
            hidden_size=args.rnn_hidden_size,
            num_layers=1,
            dropout_rate=1 - args.keep_prob,
            dropout_output=False,
            concat_layers=False,
            rnn_type=self.RNN_TYPES['gru'],
            padding=False,
        )

        self.logits = nn.Linear(2 * args.rnn_hidden_size,
                                NUM_CLASSES,
                                bias=True)
Exemplo n.º 11
0
    def __init__(self):
        super(MnemonicReaderV1, self).__init__()
        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        self.embedding = wenzheng.Embedding(
            vocab_size,
            FLAGS.emb_dim,
            FLAGS.word_embedding_file,
            trainable=FLAGS.finetune_word_embedding,
            vocab2_size=FLAGS.unk_vocab_size,
            vocab2_trainable=FLAGS.finetune_unk_vocab)
        self.num_layers = FLAGS.num_layers
        self.num_units = FLAGS.rnn_hidden_size
        self.keep_prob = FLAGS.keep_prob

        logging.info('num_layers:', self.num_layers)
        logging.info('num_unints:', self.num_units)
        logging.info('keep_prob:', self.keep_prob)

        self.encode = melt.layers.CudnnRnn(num_layers=self.num_layers,
                                           num_units=self.num_units,
                                           keep_prob=self.keep_prob)

        if FLAGS.use_qc_att or FLAGS.use_bidaf_att:
            assert not (FLAGS.use_qc_att and FLAGS.use_bidaf_att
                        ), 'use rnet or use bidaf? just choose one!'
            #Attention = melt.layers.DotAttention if FLAGS.use_qc_att else melt.layers.BiDAFAttention
            Attention = melt.layers.SeqAttnMatch if FLAGS.use_qc_att else melt.layers.BiDAFAttention
            # seems share att and match attention is fine a bit improve ? but just follow squad to use diffent dot attention
            # NOTICE for eager mode ckpt save should not write as x = [None] * 3 can not save...
            self.att_dot_attentions = []
            self.att_encodes = []
            for _ in range(FLAGS.hop):
                self.att_dot_attentions.append(
                    Attention(hidden=self.num_units,
                              keep_prob=self.keep_prob,
                              combiner=FLAGS.att_combiner))
                #self.att_dot_attentions.append(Attention(keep_prob=self.keep_prob, combiner=FLAGS.att_combiner, identity=True))
                #self.att_dot_attentions.append(Attention(combiner=FLAGS.att_combiner, identity=True))
                # TODO seems not work like layers.Dense... name in graph mode in eager mode will name as att_encode, match_encode
                # in graph mode just cudnn_rnn, cudnn_rnn_1 so all ignore name=.. not like layers.Dense.. TODO
                # TODO seems in mreader do not use att_encode .. so check which is better use or not to use..
                self.att_encodes.append(
                    melt.layers.CudnnRnn(num_layers=1,
                                         num_units=self.num_units,
                                         keep_prob=self.keep_prob))
                #self.att_encode = melt.layers.CudnnRnn(num_layers=1, num_units=self.num_units, keep_prob=0.5)

        if FLAGS.use_label_emb or FLAGS.use_label_att:
            assert not (FLAGS.use_label_emb and FLAGS.use_label_att)
            self.label_emb_height = NUM_CLASSES if not FLAGS.label_emb_height else FLAGS.label_emb_height
            self.label_embedding = melt.layers.Embedding(
                self.label_emb_height, FLAGS.emb_dim)
            if not FLAGS.use_label_att:
                # TODO not use activation ?
                #self.label_dense = keras.layers.Dense(FLAGS.emb_dim, activation=tf.nn.relu)
                self.label_dense = keras.layers.Dense(FLAGS.emb_dim)
            else:
                self.label_att_dot_attention = melt.layers.DotAttention(
                    hidden=self.num_units,
                    keep_prob=self.keep_prob,
                    combiner=FLAGS.att_combiner)
                self.label_att_encode = melt.layers.CudnnRnn(
                    num_layers=1,
                    num_units=self.num_units,
                    keep_prob=self.keep_prob)
                #self.label_att_encode = melt.layers.CudnnRnn(num_layers=1, num_units=self.num_units, keep_prob=0.5)

        if FLAGS.use_self_match:
            self.match_dot_attentions = []
            self.match_encodes = []
            for _ in range(FLAGS.hop):
                self.match_dot_attentions.append(
                    melt.layers.DotAttention(hidden=self.num_units,
                                             keep_prob=self.keep_prob,
                                             combiner=FLAGS.att_combiner))
                #self.match_dot_attentions.append(melt.layers.SelfAttnMatch(keep_prob=self.keep_prob, combiner=FLAGS.att_combiner, identity=True, diag=False))
                #self.match_dot_attentions.append(melt.layers.SelfAttnMatch(combiner=FLAGS.att_combiner, identity=True, diag=False))
                self.match_encodes.append(
                    melt.layers.CudnnRnn(num_layers=1,
                                         num_units=self.num_units,
                                         keep_prob=self.keep_prob))
            #self.match_encode = melt.layers.CudnnRnn(num_layers=1, num_units=self.num_units, keep_prob=0.5)

        if FLAGS.use_answer_emb:
            self.context_dense = keras.layers.Dense(FLAGS.emb_dim)
            self.answer_dense = keras.layers.Dense(FLAGS.emb_dim)
            # self.context_dense = keras.layers.Dense(FLAGS.emb_dim, activation=tf.nn.relu)
            # self.answer_dense = keras.layers.Dense(FLAGS.emb_dim, activation=tf.nn.relu)

        logging.info('encoder_output_method:', FLAGS.encoder_output_method)
        logging.info('topk:', FLAGS.top_k)
        self.pooling = melt.layers.Pooling(FLAGS.encoder_output_method,
                                           top_k=FLAGS.top_k,
                                           att_activation=getattr(
                                               tf.nn, FLAGS.att_activation))

        self.logits = keras.layers.Dense(NUM_CLASSES)
        if FLAGS.split_type:
            self.logits2 = keras.layers.Dense(NUM_CLASSES)
Exemplo n.º 12
0
    def __init__(self, args=None):
        super(MnemonicReaderV3, self).__init__()
        if args is None:
            args = FLAGS
        # Store config
        self.args = args

        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        self.embedding = wenzheng.pyt.get_embedding(
            vocab_size, args.emb_dim, args.word_embedding_file,
            args.finetune_word_embedding)

        doc_input_size = args.emb_dim
        self.dropout_rate = 1 - args.keep_prob

        self.num_layers = 1

        # Encoder
        self.encoding_rnn = layers.CudnnRnn(
            input_size=doc_input_size,
            hidden_size=args.rnn_hidden_size,
            num_layers=1,
            dropout_rate=1 - args.keep_prob,
            dropout_output=False,
            concat_layers=False,
            rnn_type=self.RNN_TYPES['gru'],
            padding=args.rnn_padding,
        )

        doc_hidden_size = 2 * args.rnn_hidden_size

        # Interactive aligning, self aligning and aggregating
        self.interactive_aligners = nn.ModuleList()
        self.interactive_SFUs = nn.ModuleList()
        self.self_aligners = nn.ModuleList()
        self.self_SFUs = nn.ModuleList()
        self.aggregate_rnns = nn.ModuleList()

        for i in range(args.hop):
            # interactive aligner
            self.interactive_aligners.append(
                layers.SeqAttnMatch(doc_hidden_size, identity=True))
            self.interactive_SFUs.append(
                layers.SFU(doc_hidden_size, 3 * doc_hidden_size))
            # self aligner
            self.self_aligners.append(
                layers.SelfAttnMatch(doc_hidden_size,
                                     identity=True,
                                     diag=False))
            self.self_SFUs.append(
                layers.SFU(doc_hidden_size, 3 * doc_hidden_size))
            # aggregating
            self.aggregate_rnns.append(
                layers.StackedBRNN(
                    input_size=doc_hidden_size,
                    hidden_size=args.rnn_hidden_size,
                    num_layers=1,
                    dropout_rate=1 - args.keep_prob,
                    dropout_output=False,
                    concat_layers=False,
                    rnn_type=self.RNN_TYPES['gru'],
                    padding=False,
                ))

        self.pooling = lele.layers.Pooling(FLAGS.encoder_output_method,
                                           input_size=2 * args.rnn_hidden_size,
                                           top_k=FLAGS.top_k,
                                           att_activation=getattr(
                                               F, FLAGS.att_activation))

        pre_logits_dim = self.pooling.output_size
        if FLAGS.use_type_emb:
            num_types = 2
            type_emb_dim = 10
            self.type_embedding = nn.Embedding(num_types, type_emb_dim)
            pre_logits_dim += type_emb_dim

        self.logits = nn.Linear(pre_logits_dim, NUM_CLASSES)
        self.logits2 = nn.Linear(pre_logits_dim, NUM_CLASSES)
Exemplo n.º 13
0
    def __init__(self, args=None):
        super(MnemonicReaderV1, self).__init__()
        if args is None:
            args = FLAGS
        # Store config
        self.args = args

        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        self.embedding = wenzheng.pyt.get_embedding(
            vocab_size, args.emb_dim, args.word_embedding_file,
            args.finetune_word_embedding)

        doc_input_size = args.emb_dim

        # Encoder
        self.encoding_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.rnn_hidden_size,
            num_layers=1,
            dropout_rate=1 - args.keep_prob,
            dropout_output=False,
            concat_layers=False,
            rnn_type=self.RNN_TYPES['gru'],
            padding=False,
        )

        doc_hidden_size = 2 * args.rnn_hidden_size

        # Interactive aligning, self aligning and aggregating
        self.interactive_aligners = nn.ModuleList()
        self.interactive_SFUs = nn.ModuleList()
        self.self_aligners = nn.ModuleList()
        self.self_SFUs = nn.ModuleList()
        self.aggregate_rnns = nn.ModuleList()

        for i in range(args.hop):
            # interactive aligner
            self.interactive_aligners.append(
                layers.SeqAttnMatch(doc_hidden_size, identity=True))
            self.interactive_SFUs.append(
                layers.SFU(doc_hidden_size, 3 * doc_hidden_size))
            # self aligner
            self.self_aligners.append(
                layers.SelfAttnMatch(doc_hidden_size,
                                     identity=True,
                                     diag=False))
            self.self_SFUs.append(
                layers.SFU(doc_hidden_size, 3 * doc_hidden_size))
            # aggregating
            self.aggregate_rnns.append(
                layers.StackedBRNN(
                    input_size=doc_hidden_size,
                    hidden_size=args.rnn_hidden_size,
                    num_layers=1,
                    dropout_rate=1 - args.keep_prob,
                    dropout_output=False,
                    concat_layers=False,
                    rnn_type=self.RNN_TYPES['gru'],
                    padding=False,
                ))

        self.logits = nn.Linear(2 * args.rnn_hidden_size, NUM_CLASSES)
        self.logits2 = nn.Linear(2 * args.rnn_hidden_size, NUM_CLASSES)
Exemplo n.º 14
0
    def __init__(self):
        super(RNet, self).__init__()
        vocabulary.init()
        vocab_size = vocabulary.get_vocab_size()

        self.embedding = wenzheng.Embedding(
            vocab_size,
            FLAGS.emb_dim,
            FLAGS.word_embedding_file,
            trainable=FLAGS.finetune_word_embedding,
            vocab2_size=FLAGS.unk_vocab_size,
            vocab2_trainable=FLAGS.finetune_unk_vocab)
        self.num_layers = FLAGS.num_layers
        self.num_units = FLAGS.rnn_hidden_size
        self.keep_prob = FLAGS.keep_prob

        logging.info('num_layers:', self.num_layers)
        logging.info('num_unints:', self.num_units)
        logging.info('keep_prob:', self.keep_prob)

        self.encode = melt.layers.CudnnRnn(num_layers=self.num_layers,
                                           num_units=self.num_units,
                                           keep_prob=self.keep_prob)

        if FLAGS.use_qc_att or FLAGS.use_bidaf_att:
            assert not (FLAGS.use_qc_att and FLAGS.use_bidaf_att
                        ), 'use rnet or use bidaf? just choose one!'
            Attention = melt.layers.DotAttention if FLAGS.use_qc_att else melt.layers.BiDAFAttention
            # seems share att and match attention is fine a bit improve ? but just follow squad to use diffent dot attention
            self.att_dot_attention = Attention(hidden=self.num_units,
                                               keep_prob=self.keep_prob,
                                               combiner=FLAGS.att_combiner)
            # TODO seems not work like layers.Dense... name in graph mode in eager mode will name as att_encode, match_encode
            # in graph mode just cudnn_rnn, cudnn_rnn_1 so all ignore name=.. not like layers.Dense.. TODO
            self.att_encode = melt.layers.CudnnRnn(num_layers=1,
                                                   num_units=self.num_units,
                                                   keep_prob=self.keep_prob)
            #self.att_encode = melt.layers.CudnnRnn(num_layers=1, num_units=self.num_units, keep_prob=0.5)

        if FLAGS.use_label_emb or FLAGS.use_label_att:
            assert not (FLAGS.use_label_emb and FLAGS.use_label_att)
            self.label_emb_height = NUM_CLASSES if not FLAGS.label_emb_height else FLAGS.label_emb_height
            self.label_embedding = melt.layers.Embedding(
                self.label_emb_height, FLAGS.emb_dim)
            if not FLAGS.use_label_att:
                # TODO not use activation ?
                #self.label_dense = keras.layers.Dense(FLAGS.emb_dim, activation=tf.nn.relu)
                self.label_dense = keras.layers.Dense(FLAGS.emb_dim,
                                                      use_bias=False)
            else:
                self.label_att_dot_attention = melt.layers.DotAttention(
                    hidden=self.num_units,
                    keep_prob=self.keep_prob,
                    combiner=FLAGS.att_combiner)
                self.label_att_encode = melt.layers.CudnnRnn(
                    num_layers=1,
                    num_units=self.num_units,
                    keep_prob=self.keep_prob)
                #self.label_att_encode = melt.layers.CudnnRnn(num_layers=1, num_units=self.num_units, keep_prob=0.5)

        if FLAGS.use_self_match:
            self.match_dot_attention = melt.layers.DotAttention(
                hidden=self.num_units,
                keep_prob=self.keep_prob,
                combiner=FLAGS.att_combiner)
            self.match_encode = melt.layers.CudnnRnn(num_layers=1,
                                                     num_units=self.num_units,
                                                     keep_prob=self.keep_prob)
            #self.match_encode = melt.layers.CudnnRnn(num_layers=1, num_units=self.num_units, keep_prob=0.5)

        # TODO might try to all set use_bias=True
        if FLAGS.use_answer_emb:
            self.context_dense = keras.layers.Dense(FLAGS.emb_dim,
                                                    use_bias=False)
            self.answer_dense = keras.layers.Dense(FLAGS.emb_dim,
                                                   use_bias=False)
            # self.context_dense = keras.layers.Dense(FLAGS.emb_dim, activation=tf.nn.relu)
            # self.answer_dense = keras.layers.Dense(FLAGS.emb_dim, activation=tf.nn.relu)

        logging.info('encoder_output_method:', FLAGS.encoder_output_method)
        logging.info('topk:', FLAGS.top_k)
        self.pooling = melt.layers.Pooling(FLAGS.encoder_output_method,
                                           top_k=FLAGS.top_k,
                                           att_activation=getattr(
                                               tf.nn, FLAGS.att_activation))

        self.logits = keras.layers.Dense(NUM_CLASSES, activation=None)
        if FLAGS.split_type:
            self.logits2 = keras.layers.Dense(NUM_CLASSES, activation=None)