示例#1
0
    def __init__(self, name='ra', nimg=2048, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, na)).astype(theano.config.floatX))
示例#2
0
 def __init__(self, embedding_matrix, opt):
     super(IAN, self).__init__()
     self.opt = opt
     self.embed = nn.Embedding.from_pretrained(
         torch.tensor(embedding_matrix, dtype=torch.float))
     self.lstm_context = DynamicLSTM(opt.embed_dim,
                                     opt.hidden_dim,
                                     num_layers=1,
                                     batch_first=True)
     self.lstm_aspect = DynamicLSTM(opt.embed_dim,
                                    opt.hidden_dim,
                                    num_layers=1,
                                    batch_first=True)
     self.attention_aspect = Attention(opt.hidden_dim,
                                       score_function='bi_linear')
     self.attention_context = Attention(opt.hidden_dim,
                                        score_function='bi_linear')
     self.dense = nn.Linear(opt.hidden_dim * 2, opt.polarities_dim)
示例#3
0
 def __init__(self, params):
     super(Seq2Seq, self).__init__()
     self.params = params
     self.embedding_matrix = load_embedding_matrix()
     self.encoder = Encoder(params["vocab_size"], params["vector_dim"],
                            params["encoder_units"], self.embedding_matrix)
     self.attention = Attention(params["attn_units"])
     self.decoder = Decoder(params["vocab_size"], params["vector_dim"],
                            params["decoder_units"], self.embedding_matrix)
    def __init__(self, num_layers, num_heads, embed_dim, ff_dim, dropout=0.):
        super(Decoder, self).__init__()
        self.self_atts = nn.ModuleList([])
        self.enc_dec_atts = nn.ModuleList([])
        self.pos_ffs = nn.ModuleList([])
        self.lnorms = nn.ModuleList([])
        for i in range(num_layers):
            self.self_atts.append(
                Attention(embed_dim, num_heads, dropout=dropout))
            self.enc_dec_atts.append(
                Attention(embed_dim, num_heads, dropout=dropout))
            self.pos_ffs.append(
                PositionWiseFeedForward(embed_dim, ff_dim, dropout=dropout))
            self.lnorms.append(
                nn.ModuleList(
                    [nn.LayerNorm(embed_dim, eps=1e-6) for i in range(3)]))

        self.last_lnorm = nn.LayerNorm(embed_dim, eps=1e-6)
        self.dropout = dropout
        self.num_layers = num_layers
示例#5
0
def create_Attention_layer(layer_info):
    if logging_enabled == True:
        print("- Entered layers_factory::create_Attention_layer Global Method")

    if not len(layer_info) == 5:
        raise RuntimeError('Attention layer must have 5 specs')

    return Attention(input_dim=int(layer_info['input_dim']),
                     att_times=int(layer_info['att_times']),
                     att_num=int(layer_info['att_num']),
                     att_style=layer_info['att_style'],
                     att_weight=parse_as_bool(layer_info['att_weight']))
示例#6
0
    def __init__(self, n_feat, n_message_passing, n_hid, n_penultimate,
                 n_class, dropout, embeddings, use_master_node,
                 graph_of_sentences):
        super(MPAD, self).__init__()
        self.graph_of_sentences = graph_of_sentences
        self.n_message_passing = n_message_passing
        self.embedding = nn.Embedding(embeddings.shape[0], embeddings.shape[1])
        self.embedding.weight.data.copy_(torch.from_numpy(embeddings))
        self.embedding.weight.requires_grad = False

        self.mps1 = torch.nn.ModuleList()
        self.atts1 = torch.nn.ModuleList()
        for i in range(n_message_passing):
            if i == 0:
                self.mps1.append(MessagePassing(n_feat, n_hid))
            else:
                self.mps1.append(MessagePassing(n_hid, n_hid))
            self.atts1.append(Attention(n_hid, n_hid, use_master_node))

        if use_master_node:
            self.bn = nn.BatchNorm1d(2 * n_message_passing * n_hid, n_hid)
            self.fc1 = nn.Linear(2 * n_message_passing * n_hid, n_hid)
        else:
            self.bn = nn.BatchNorm1d(n_message_passing * n_hid, n_hid)
            self.fc1 = nn.Linear(n_message_passing * n_hid, n_hid)

        if graph_of_sentences == 'sentence_att':
            self.att = Attention(n_hid, n_hid, False)
            self.fc2 = nn.Linear(n_hid, n_penultimate)
        else:
            self.fc2 = nn.Linear(n_message_passing * n_hid, n_penultimate)
            self.mps2 = torch.nn.ModuleList()
            self.atts2 = torch.nn.ModuleList()
            for i in range(n_message_passing):
                self.mps2.append(MessagePassing(n_hid, n_hid))
                self.atts2.append(Attention(n_hid, n_hid, False))

        self.fc3 = nn.Linear(n_penultimate, n_class)
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()
    def atae_lstm(self):
        input_text = Input(shape=(self.max_len, ))
        input_aspect = Input(shape=(1, ), )

        if self.config.word_embed_type is not 'random':
            word_embedding = Embedding(
                input_dim=self.text_embeddings.shape[0],
                output_dim=self.config.word_embed_dim,
                weights=[self.text_embeddings],
                trainable=self.config.word_embed_trainable,
                mask_zero=True)
        else:
            word_embedding = Embedding(
                input_dim=self.config.text_random_input_dim,
                output_dim=self.config.word_embed_dim,
                mask_zero=True)
        # dropout 丢弃比例0.2
        text_embed = SpatialDropout1D(0.2)(word_embedding(input_text))

        if self.config.aspect_embed_type is 'random':
            asp_embedding = Embedding(
                input_dim=self.config.aspect_random_input_dim,
                output_dim=self.config.aspect_embed_dim)
        else:
            asp_embedding = Embedding(
                input_dim=self.config.aspect_random_input_dim,  # 其实永远为20
                output_dim=self.config.aspect_embed_dim,
                trainable=self.config.aspect_embed_trainable)
        aspect_embed = asp_embedding(input_aspect)
        aspect_embed = Flatten()(aspect_embed)  # reshape to 2d
        repeat_aspect = RepeatVector(self.max_len)(
            aspect_embed)  # repeat aspect for every word in sequence

        input_concat = concatenate([text_embed, repeat_aspect], axis=-1)
        hidden_vecs, state_h, _ = LSTM(self.config.lstm_units,
                                       return_sequences=True,
                                       return_state=True)(input_concat)
        concat = concatenate([hidden_vecs, repeat_aspect], axis=-1)

        # apply attention mechanism
        attend_weight = Attention()(concat)
        attend_weight_expand = Lambda(lambda x: K.expand_dims(x))(
            attend_weight)
        attend_hidden = multiply([hidden_vecs, attend_weight_expand])
        attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden)

        attend_hidden_dense = Dense(self.config.lstm_units)(attend_hidden)
        last_hidden_dense = Dense(self.config.lstm_units)(state_h)
        final_output = Activation('tanh')(add(
            [attend_hidden_dense, last_hidden_dense]))

        return Model([input_text, input_aspect], final_output)
示例#8
0
文件: DMGI.py 项目: zhanghegui/DMGI
    def __init__(self, args):
        super(modeler, self).__init__()
        self.args = args
        self.gcn = nn.ModuleList([GCN(args.ft_size, args.hid_units, args.activation, args.drop_prob, args.isBias) for _ in range(args.nb_graphs)])

        self.disc = Discriminator(args.hid_units)
        self.H = nn.Parameter(torch.FloatTensor(1, args.nb_nodes, args.hid_units))
        self.readout_func = self.args.readout_func
        if args.isAttn:
            self.attn = nn.ModuleList([Attention(args) for _ in range(args.nheads)])

        if args.isSemi:
            self.logistic = LogReg(args.hid_units, args.nb_classes).to(args.device)

        self.init_weight()
 def __init__(self, config, embeddings=None):
     # 定义模型输入
     sent_inputs = Input(shape=(config.max_words, ), dtype='float64')
     doc_inputs = Input(shape=(config.max_sents, config.max_words),
                        dtype='float64')
     # 嵌入层
     embed = embedding_layers(config, embeddings)(sent_inputs)
     # 句子编码
     sent_enc = Bidirectional(
         GRU(config.rnn_units[0],
             dropout=config.drop_rate[0],
             recurrent_dropout=config.re_drop[0],
             return_sequences=True))(embed)
     sent_att = Attention(config.att_size[0], name='AttLayer_1')(sent_enc)
     self.sent_model = Model(sent_inputs, sent_att)
     # 段落编码
     doc_emb = TimeDistributed(self.sent_model)(doc_inputs)
     doc_enc = Bidirectional(
         GRU(config.rnn_units[1],
             dropout=config.drop_rate[1],
             recurrent_dropout=config.re_drop[1],
             return_sequences=True))(doc_emb)
     doc_att = Attention(config.att_size[1], name='AttLayer_2')(doc_enc)
     # FC
     fc1_drop = Dropout(config.drop_rate[1])(doc_att)
     fc1_bn = BatchNormalization()(fc1_drop)
     fc1 = Dense(config.fc_units[0],
                 activation=config.activation_func,
                 kernel_initializer='he_normal',
                 kernel_regularizer=regularizers.l2(0.01))(fc1_bn)
     fc2_drop = Dropout(config.drop_rate[1])(fc1)
     # 输出
     doc_pred = Dense(config.ntags, activation=config.classifier)(fc2_drop)
     # 最终模型
     self.model = Model(inputs=doc_inputs, outputs=doc_pred)
     self.config = config
示例#10
0
 def _setup_layers(self):
     """
     Creating layers of model.
     1. GCN layers.
     2. Primary capsules.
     3. Attention
     4. Graph capsules.
     5. Class capsules.
     """
     self.base_layers = [GCNConv(self.number_of_features, self.args.gcn_filters)]
     for layer in range(self.args.gcn_layers-1):
         self.base_layers.append(GCNConv( self.args.gcn_filters, self.args.gcn_filters))
     self.base_layers = ListModule(*self.base_layers)
     self.first_capsule = PrimaryCapsuleLayer(self.args.gcn_filters, self.args.gcn_layers, self.args.gcn_layers, self.args.capsule_dimensions)
     self.attention = Attention(self.args.gcn_layers* self.args.gcn_filters*self.args.capsule_dimensions, self.args.inner_attention_dimension)
     self.graph_capsule =  SecondaryCapsuleLayer(self.args.gcn_layers*self.args.gcn_filters, self.args.capsule_dimensions, self.args.number_of_capsules, self.args.capsule_dimensions)
     self.class_capsule =  SecondaryCapsuleLayer(self.args.capsule_dimensions,self.args.number_of_capsules, self.number_of_targets, self.args.capsule_dimensions)
    def __init__(self, n_feat,
                 max_resolution,
                 n_classes=0,
                 use_dropout=None,
                 use_attention=False,
                 arch=None,
                 return_features=False):
        super().__init__()
        self.max_resolution = max_resolution
        self.use_dropout = use_dropout
        self.return_features = return_features
        self.res1 = ResBlock_D(3, n_feat, downsample=True)
        self.use_attention = use_attention
        if use_attention:
            self.attn = Attention(n_feat)

        self.residual_blocks = nn.ModuleList([])
        n_layers = int(np.log2(self.max_resolution)) - 2
        last_block_factor = 0

        for i in range(n_layers):
            is_last = (i == n_layers - 1)
            if arch is None:
                prev_factor = 2 ** (i)
                curr_factor = 2 ** (i + 1)
            else:
                prev_factor = arch[i]
                curr_factor = arch[i + 1]
            # print(f"block ({i}): {prev_factor}, {curr_factor}")
            block = ResBlock_D(prev_factor * n_feat, curr_factor * n_feat,
                               downsample=not is_last)
            self.residual_blocks.add_module(f"res_block_{i}", block)
            if is_last:
                last_block_factor = curr_factor

        if self.use_dropout is not None:
            self.dropout = nn.Dropout(self.use_dropout)

        self.fc = nn.utils.spectral_norm(
            nn.Linear(last_block_factor * n_feat, 1)).apply(
            init_weight)
        self.embedding = nn.Embedding(num_embeddings=n_classes,
                                      embedding_dim=last_block_factor * n_feat).apply(
            init_weight)
示例#12
0
def build_attention():
    """
    Build the model architecture for attention output
    """
    inputs = Input(shape=(MAX_LEN, 20), name='Input')
    masking = Masking(mask_value=0.0,
                      input_shape=(MAX_LEN, 20),
                      name='Masking')(inputs)
    hidden = Bidirectional(LSTM(512,
                                use_bias=True,
                                dropout=0.5,
                                return_sequences=True),
                           name='Bidirectional-LSTM')(masking)
    hidden = MultiHeadAttention(head_num=32,
                                activation='relu',
                                use_bias=True,
                                return_multi_attention=False,
                                name='Multi-Head-Attention')(hidden)
    hidden = Dropout(0.2, name='Dropout_1')(hidden)
    hidden = Attention(return_attention=True, name='Attention')(hidden)
    model = Model(inputs=inputs, outputs=hidden)
    return model
示例#13
0
    def build(self):

        if self.opt.match_type == 'pointwise':
            reps = [
                self.representation_model.get_representation(doc)
                for doc in [self.question, self.answer]
            ]
            if self.opt.onehot:
                output = self.dense_last(Attention()(reps))
            else:
                output = self.distance(reps)
#
            model = Model([self.question, self.answer], output)

        elif self.opt.match_type == 'pairwise':

            q_rep = self.representation_model.get_representation(self.question)

            score1 = self.distance([
                q_rep,
                self.representation_model.get_representation(self.answer)
            ])
            score2 = self.distance([
                q_rep,
                self.representation_model.get_representation(self.neg_answer)
            ])
            basic_loss = MarginLoss(self.opt.margin)([score1, score2])

            output = [score1, basic_loss, basic_loss]
            model = Model([self.question, self.answer, self.neg_answer],
                          output)
        else:
            raise ValueError(
                'wrong input of matching type. Please input pairwise or pointwise.'
            )
        return model
示例#14
0
    Embedding(e_vocab_size, EMB_DIM),
    GRU(EMB_DIM, HID_DIM, m),
    GRU(EMB_DIM, HID_DIM, m[:, ::-1])
]

x_emb = f_props(encoder[:1], x)
h_ef = f_props(encoder[1:2], x_emb)
h_eb = f_props(encoder[2:], x_emb[:, ::-1])[:, ::-1, :]
h_e = tf.concat([h_ef, h_eb], axis=2)
h_d1_0 = tf.reduce_mean(h_e, axis=1)
h_d2_0 = tf.reduce_mean(h_e, axis=1)

decoder = [
    Embedding(d_vocab_size, EMB_DIM),
    GRU(EMB_DIM, 2 * HID_DIM, tf.ones_like(t_in, dtype='float32'), h_0=h_d1_0),
    Attention(2 * HID_DIM, 2 * HID_DIM, h_e, ma),
    GRU(EMB_DIM + 2 * HID_DIM,
        2 * HID_DIM,
        tf.ones_like(t_in, dtype='float32'),
        h_0=h_d2_0),
    RVAE(EMB_DIM, 2 * HID_DIM, LAT_DIM),
    Dense3d(LAT_DIM + 2 * HID_DIM, HID_DIM, tf.nn.tanh),
    Dense3d(HID_DIM, d_vocab_size, tf.nn.softmax)
]

t_in_emb = f_props(decoder[:1], t_in)
h_d1 = f_props(decoder[1:2], t_in_emb)
h_d1__ = tf.concat([h_d1_0[:, None, :], h_d1], axis=1)[:, :-1, :]
c = f_props(decoder[2:3], h_d1)
h_d2 = f_props(decoder[3:4], tf.concat([t_in_emb, c], axis=2))
z, KL = f_props(decoder[4:5], [h_d1__, t_in_emb])
示例#15
0
    def __init__(self, blocks_args, global_args):
        super().__init__()
        assert isinstance(blocks_args, list), 'blocks_args should be a list'
        assert len(blocks_args) > 0, 'block args must be greater than 0'
        self._global_args = global_args

        out_channels = 3  # rgb

        self._input_expand = nn.Linear(self._global_args.input_size,
                                       self._global_args.seq_size)

        def get_block(constructor, use, *args):
            if use:
                return constructor(*args)
            else:
                return None

        # linear block
        self._base_transformer = get_block(
            Transformer, self._global_args.use_base_transformer,
            self._global_args.base_transformer_args())
        self._seq_to_image_start = SeqToImageStart(
            self._global_args.seq_to_image_start_args())

        self._image_blocks = nn.ModuleList([])
        self._image_to_seq_blocks = nn.ModuleList([])
        self._seq_blocks = nn.ModuleList([])

        last_ch = blocks_args[-1].output_ch

        for i, block_args in enumerate(blocks_args):
            input_ch = block_args.input_ch
            output_ch = block_args.output_ch

            for repeat_num in range(block_args.num_repeat):
                block_args.next_block()

                # TODO: consider not constructing blocks which aren't used...
                self._image_blocks.append(
                    MBConvGBlock(block_args.mbconv_args()))
                self._image_to_seq_blocks.append(
                    get_block(ImageToSeq,
                              block_args.use_image_to_seq_this_block,
                              block_args.image_to_seq_args()))
                self._seq_blocks.append(
                    get_block(Transformer, block_args.use_seq_this_block,
                              block_args.transformer_args()))

            if (self._global_args.use_nonlocal
                    and i == self._global_args.nonlocal_index - 1):
                self._attention_index = len(self._image_blocks) - 1
                self._attention = Attention(block_args.output_ch)

        self._swish = MemoryEfficientSwish()

        self.output_bn = ConfigurableNorm(
            last_ch,
            input_gain_bias=False,
            norm_style=self._global_args.norm_style)
        self.output_conv = nn.Conv2d(last_ch,
                                     out_channels,
                                     kernel_size=3,
                                     padding=1)

        negative_allowance = 0.05

        # CELU might be a good choice...
        self._output_activation = nn.CELU(alpha=negative_allowance)
示例#16
0
class Model(object):
    """
    Region Attention model
    """
    def __init__(self, name='ra', nimg=2048, nnh=512, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nnh = f.attrs['nnh']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'nnh': nnh, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nnh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, nimg)).astype(theano.config.floatX))

    def compute(self, state, w_idx, feat):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w = T.concatenate([e_t, word_vec], axis=-1)
        c_t, h_t = self.lstm.compute(e_w, c_tm1, h_tm1)  # (mb,nh)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # predict word probability
        p = self.pred_mlp.compute(T.concatenate([e_t, h_t, word_vec], axis=-1))
        return new_state, p, alpha

    def scan_func(self, w_tm1, w_t, state, feat):
        # update state
        new_state, p, alpha = self.compute(state, w_tm1, feat)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss, alpha

    def init_func(self, img_value):
        if self._proj_func is None:
            img = T.tensor3()
            self._proj_func = theano.function([img], self.proj_mlp.compute(img))
        if self._init_func is None:
            init_e = self._feat_shared.mean(axis=1)
            init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
            self._init_func = theano.function([], init_state)
        self._feat_shared.set_value(self._proj_func(img_value))
        return self._init_func()

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p, _ = self.compute(state, w, self._feat_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
示例#17
0
    def __init__(self, config, mode):
        super(Model, self).__init__()
        self.logger = ut.get_logger(config['log_file'])

        ENC_SCOPE = 'encoder'
        DEC_SCOPE = 'decoder'
        ATT_SCOPE = 'attention'
        OUT_SCOPE = 'outputer'
        SFM_SCOPE = 'softmax'

        batch_size = config['batch_size']
        feed_input = config['feed_input']
        grad_clip = config['grad_clip']
        beam_size = config['beam_size']
        beam_alpha = config['beam_alpha']
        num_layers = config['num_layers']
        rnn_type = config['rnn_type']
        score_func_type = config['score_func_type']

        src_vocab_size = config['src_vocab_size']
        trg_vocab_size = config['trg_vocab_size']

        src_embed_size = config['src_embed_size']
        trg_embed_size = config['trg_embed_size']

        enc_rnn_size = config['enc_rnn_size']
        dec_rnn_size = config['dec_rnn_size']

        input_keep_prob = config['input_keep_prob']
        output_keep_prob = config['output_keep_prob']

        attention_maps = {
            ac.SCORE_FUNC_DOT: Attention.DOT,
            ac.SCORE_FUNC_GEN: Attention.GEN,
            ac.SCORE_FUNC_BAH: Attention.BAH
        }
        score_func_type = attention_maps[score_func_type]

        if mode != ac.TRAINING:
            batch_size = 1
            input_keep_prob = 1.0
            output_keep_prob = 1.0

        # Placeholder
        self.src_inputs = tf.placeholder(tf.int32, [batch_size, None])
        self.src_seq_lengths = tf.placeholder(tf.int32, [batch_size])
        self.trg_inputs = tf.placeholder(tf.int32, [batch_size, None])
        self.trg_targets = tf.placeholder(tf.int32, [batch_size, None])
        self.target_weights = tf.placeholder(tf.float32, [batch_size, None])

        # First, define the src/trg embeddings
        with tf.variable_scope(ENC_SCOPE):
            self.src_embedding = tf.get_variable(
                'embedding',
                shape=[src_vocab_size, src_embed_size],
                dtype=tf.float32)
        with tf.variable_scope(DEC_SCOPE):
            self.trg_embedding = tf.get_variable(
                'embedding',
                shape=[trg_vocab_size, trg_embed_size],
                dtype=tf.float32)

        # Then select the RNN cell, reuse if not in TRAINING mode
        if rnn_type != ac.LSTM:
            raise NotImplementedError

        reuse = mode != ac.TRAINING  # if dev/test, reuse cell
        encoder_cell = ut.get_lstm_cell(ENC_SCOPE,
                                        num_layers,
                                        enc_rnn_size,
                                        output_keep_prob=output_keep_prob,
                                        seed=ac.SEED,
                                        reuse=reuse)

        att_state_size = dec_rnn_size
        decoder_cell = ut.get_lstm_cell(DEC_SCOPE,
                                        num_layers,
                                        dec_rnn_size,
                                        output_keep_prob=output_keep_prob,
                                        seed=ac.SEED,
                                        reuse=reuse)

        # The model
        encoder = Encoder(encoder_cell, ENC_SCOPE)
        decoder = Encoder(decoder_cell, DEC_SCOPE)
        outputer = FeedForward(enc_rnn_size + dec_rnn_size,
                               att_state_size,
                               OUT_SCOPE,
                               activate_func=tf.tanh)
        self.softmax = softmax = Softmax(att_state_size, trg_vocab_size,
                                         SFM_SCOPE)

        # Encode source sentence
        encoder_inputs = tf.nn.embedding_lookup(self.src_embedding,
                                                self.src_inputs)
        encoder_inputs = tf.nn.dropout(encoder_inputs,
                                       input_keep_prob,
                                       seed=ac.SEED)
        encoder_outputs, last_state = encoder.encode(
            encoder_inputs,
            sequence_length=self.src_seq_lengths,
            initial_state=None)
        # Define an attention layer over encoder outputs
        attention = Attention(ATT_SCOPE,
                              score_func_type,
                              encoder_outputs,
                              enc_rnn_size,
                              dec_rnn_size,
                              common_dim=enc_rnn_size
                              if score_func_type == Attention.BAH else None)

        # This function takes an decoder's output, make it attend to encoder's outputs and
        # spit out the attentional state which is used for predicting next target word
        def decoder_output_func(h_t):
            alignments, c_t = attention.calc_context(self.src_seq_lengths, h_t)
            c_t_h_t = tf.concat([c_t, h_t], 1)
            output = outputer.transform(c_t_h_t)
            return output, alignments

        # Fit everything in the decoder & start decoding
        decoder_inputs = tf.nn.embedding_lookup(self.trg_embedding,
                                                self.trg_inputs)
        decoder_inputs = tf.nn.dropout(decoder_inputs,
                                       input_keep_prob,
                                       seed=ac.SEED)
        attentional_outputs = decoder.decode(decoder_inputs,
                                             decoder_output_func,
                                             att_state_size,
                                             feed_input=feed_input,
                                             initial_state=last_state,
                                             reuse=False)
        attentional_outputs = tf.reshape(attentional_outputs,
                                         [-1, att_state_size])

        # Loss
        logits = softmax.calc_logits(attentional_outputs)
        logits = tf.reshape(logits, [batch_size, -1, trg_vocab_size])
        loss = sequence_loss(logits,
                             self.trg_targets,
                             self.target_weights,
                             average_across_timesteps=False,
                             average_across_batch=False)

        if mode != ac.TRAINING:
            self.loss = tf.stop_gradient(tf.reduce_sum(loss))

            max_output_length = 3 * self.src_seq_lengths[0]
            tensor_to_state = partial(ut.tensor_to_lstm_state,
                                      num_layers=config['num_layers'])
            beam_outputs = decoder.beam_decode(self.trg_embedding,
                                               ac.BOS_ID,
                                               ac.EOS_ID,
                                               decoder_output_func,
                                               att_state_size,
                                               softmax.calc_logprobs,
                                               trg_vocab_size,
                                               max_output_length,
                                               tensor_to_state,
                                               alpha=beam_alpha,
                                               beam_size=beam_size,
                                               feed_input=feed_input,
                                               initial_state=last_state,
                                               reuse=True)
            self.probs, self.scores, self.symbols, self.parents, self.alignments = beam_outputs

        # If in training, do the grad backpropagate
        if mode == ac.TRAINING:
            self.loss = tf.reduce_sum(loss)

            # Option to fix some variables
            fixed_vars = config['fixed_var_list'] if config[
                'fixed_var_list'] else []

            if fixed_vars:
                fixed_vars = operator.attrgetter(*fixed_vars)(self)
                if isinstance(fixed_vars, list):
                    fixed_var_names = [
                        _fixed_var.name for _fixed_var in fixed_vars
                    ]
                else:
                    fixed_var_names = [fixed_vars.name]
            else:
                fixed_var_names = []

            tvars = tf.trainable_variables()
            tvars = [
                _var for _var in tvars if _var.name not in fixed_var_names
            ]

            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              grad_clip)
            self.lr = tf.Variable(1.0, trainable=False, name='lr')
            if config['optimizer'] == ac.ADADELTA:
                optimizer = tf.train.AdadeltaOptimizer(learning_rate=self.lr,
                                                       rho=0.95,
                                                       epsilon=1e-6)
            else:
                optimizer = tf.train.GradientDescentOptimizer(self.lr)

            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        # Finally, log out some model's stats
        if mode == ac.TRAINING:

            def num_params(var):
                shape = var.get_shape().as_list()
                var_count = 1
                for dim in shape:
                    var_count = var_count * dim

                return var_count

            self.logger.info('{} model:'.format('train' if mode ==
                                                ac.TRAINING else 'dev/test'))
            self.logger.info('Num trainable variables {}'.format(len(tvars)))
            self.logger.info('Num params: {:,}'.format(
                sum([num_params(v) for v in tvars])))
            self.logger.info('List of all trainable parameters:')
            for v in tvars:
                self.logger.info('   {}'.format(v.name))
            self.logger.info('List of all fixed parameters')
            for v in fixed_var_names:
                self.logger.info('   {}'.format(v))
示例#18
0
def cnn_lstm_f1():
    with open('vocab.data', 'rb') as fin:
        vocab = pickle.load(fin)

    question1 = Input(shape=(20, ))
    question2 = Input(shape=(20, ))

    q1 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=20,
                   trainable=False)(question1)

    q2 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=20,
                   trainable=False)(question2)

    f_rnn = LSTM(30, return_sequences=True, implementation=1)
    b_rnn = LSTM(30,
                 return_sequences=True,
                 implementation=1,
                 go_backwards=True)

    pos = Position_Embedding(mode='concat')
    att = Attention(20)

    q1 = BatchNormalization()(q1)
    qf_rnn = f_rnn(q1)
    qb_rnn = b_rnn(q1)

    q1_rnn = concatenate([qf_rnn, qb_rnn], axis=-1)
    q1_rnn = pos(q1_rnn)
    q1_rnn = concatenate([q1_rnn, att(q1_rnn)])

    q2 = BatchNormalization()(q2)
    af_rnn = f_rnn(q2)
    ab_rnn = b_rnn(q2)
    q2_rnn = concatenate([af_rnn, ab_rnn], axis=-1)
    q2_rnn = pos(q2_rnn)
    q2_rnn = concatenate([q2_rnn, att(q2_rnn)])

    # cnn
    cnns = [
        Conv1D(kernel_size=kernel_size,
               filters=100,
               activation='tanh',
               padding='same') for kernel_size in [1, 2, 3, 5]
    ]
    # qq_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat')
    q1_cnn = concatenate([cnn(q1_rnn) for cnn in cnns], axis=-1)
    # q2_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat')
    q2_cnn = concatenate([cnn(q2_rnn) for cnn in cnns], axis=-1)

    maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                     output_shape=lambda x: (x[0], x[2]))
    maxpool.supports_masking = True
    q1_pool = Dropout(0.05)(maxpool(q1_cnn))
    q2_pool = Dropout(0.05)(maxpool(q2_cnn))
    merged1 = Dense(100, activation='relu')(q1_pool)
    merged2 = Dense(100, activation='relu')(q2_pool)
    merged = concatenate([merged1, merged2])

    is_duplicate = Dense(1, activation='sigmoid')(merged)

    model = Model(inputs=[question1, question2], outputs=is_duplicate)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
示例#19
0
embedded = Embedding(input_dim=vocabulary_size, output_dim=embedding_dims)(X)

# Recurrent Layers
if config != 0:
    encoder_output, hidden_state, cell_state = CuDNNLSTM(
        units=128, return_sequences=True, return_state=True)(embedded)
    attention_input = [encoder_output, hidden_state]
else:
    encoder_output = CuDNNLSTM(units=128)(embedded)

# Optional Attention Mechanisms
if config == 1:
    encoder_output, attention_weights = SelfAttention(
        size=128, num_hops=10, use_penalization=False)(encoder_output)
elif config == 2:
    encoder_output, attention_weights = Attention(
        context='many-to-one', alignment_type='global')(attention_input)
    encoder_output = Flatten()(encoder_output)
elif config == 3:
    encoder_output, attention_weights = Attention(
        context='many-to-one',
        alignment_type='local-p*',
        window_width=100,
        score_function='scaled_dot')(attention_input)
    encoder_output = Flatten()(encoder_output)

# Prediction Layer
Y = Dense(units=num_categories, activation='softmax')(encoder_output)

# Compile model
model = Model(inputs=X, outputs=Y)
model.compile(loss='sparse_categorical_crossentropy',
                            trainable=True)
print('Embedding matrix completed.')

# -------------- DNN goes after here ---------------------
cinput = Input(shape=(context_maxlen,), dtype='int32')
cembed = embedding_layer(cinput)
clstm1 = Bidirectional(LSTM(100, return_sequences=True))(cembed)

qinput = Input(shape=(question_maxlen,), dtype='int32')
qembed = embedding_layer(qinput)
qlstm1 = Bidirectional(LSTM(100, return_sequences=True))(qembed)

cdecoder = RecurrentContainer(decode=True, output_length=context_maxlen, input_length=context_maxlen)
cdecoder.add(AttentionDecoderCell(output_dim=100, hidden_dim=100))
clstm2 = cdecoder(clstm1)
ch1 = Attention(qlstm1)(clstm1)
clstm2 = Bidirectional(LSTM(100, return_sequences=True))(ch1)

qh1 = Attention(clstm2)(qlstm1)
qlstm2 = Bidirectional(LSTM(100, return_sequences=True))(qh1)

ch2 = Attention(qlstm2)(clstm2)
qh2 = Attention(ch2)(qlstm2)

h = Merge([ch2, qh2], mode='concat')
hlstm = Bidirectional(LSTM(100))(h)
output1 = Dense(context_maxlen, activation='softmax')(hlstm)
hmerge = Merge([hlstm, output1], mode='concat')
output2 = Dense(context_maxlen, activation='softmax')(hmerge)

qnamodel = Model(input=[cinput, qinput], output=[output1, output2])
示例#21
0
def attention_lstm():
    with open('vocab.data', 'rb') as fin:
        vocab = pickle.load(fin)

    question1 = Input(shape=(15, ))
    question2 = Input(shape=(15, ))

    q1 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=15,
                   trainable=False)(question1)

    q2 = Embedding(vocab.nb_words + 1,
                   300,
                   weights=[vocab.embedding],
                   input_length=15,
                   trainable=False)(question2)

    pos = Position_Embedding()
    f_rnn = LSTM(256, return_sequences=True, consume_less='mem')
    b_rnn = LSTM(256,
                 return_sequences=True,
                 consume_less='mem',
                 go_backwards=True)

    maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False),
                     output_shape=lambda x: (x[0], x[2]))
    maxpool.supports_masking = True

    q1 = pos(q1)
    q2 = pos(q2)
    qf_rnn = f_rnn(q1)
    qb_rnn = b_rnn(q1)
    # q1_rnn = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1)
    q1_rnn = concatenate([qf_rnn, qb_rnn], axis=-1)

    af_rnn = f_rnn(q2)
    ab_rnn = b_rnn(q2)
    # q2_rnn = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1)
    q2_rnn = concatenate([af_rnn, ab_rnn], axis=-1)

    att = Attention(20)

    q1_att = maxpool(att([q1_rnn, q1_rnn, q1_rnn]))
    q1 = Dense(200, activation='relu')(q1_att)

    q2_att = maxpool(attention([q2_rnn, q2_rnn, q2_rnn]))
    q2 = Dense(200, activation='relu')(q2_att)

    merged = concatenate([q1, q2])
    merged = Dense(200, activation='relu')(merged)
    merged = Dropout(0)(merged)
    merged = BatchNormalization()(merged)

    is_duplicate = Dense(1, activation='sigmoid')(merged)

    model = Model(inputs=[question1, question2], outputs=is_duplicate)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
    def __init__(self,
                 inp,
                 oup,
                 expand_ratio,
                 kernel_size,
                 stride,
                 se_reduction,
                 drop_connect_ratio=0.2):
        """Basic building block - Inverted Residual Convolution from MobileNet V2
        architecture.

        Arguments:
            expand_ratio (int): ratio to expand convolution in width inside convolution.
                It's not the same as width_mult in MobileNet which is used to increase
                persistent input and output number of channels in layer. Which is not a
                projection of channels inside the conv.
        """
        super().__init__()

        hidden_dim = int(inp * expand_ratio)
        self.use_res_connect = stride == 1 and inp == oup

        if self.use_res_connect:
            self.dropconnect = DropConnect(drop_connect_ratio)

        if expand_ratio == 1:
            self.conv = nn.Sequential(
                # depth-wise
                SamePadConv2d(inp=hidden_dim,
                              oup=hidden_dim,
                              kernel_size=kernel_size,
                              stride=stride,
                              groups=hidden_dim,
                              bias=False),
                nn.BatchNorm2d(hidden_dim,
                               eps=batch_norm_epsilon,
                               momentum=batch_norm_momentum),
                Swish(),
                Attention(
                    channels=hidden_dim,
                    reduction=4),  # somehow here reduction should be always 4

                # point-wise-linear
                SamePadConv2d(inp=hidden_dim,
                              oup=oup,
                              kernel_size=1,
                              stride=1,
                              bias=False),
                nn.BatchNorm2d(oup,
                               eps=batch_norm_epsilon,
                               momentum=batch_norm_momentum),
            )
        else:
            self.conv = nn.Sequential(
                # point-wise
                SamePadConv2d(inp,
                              hidden_dim,
                              kernel_size=1,
                              stride=1,
                              bias=False),
                nn.BatchNorm2d(hidden_dim,
                               eps=batch_norm_epsilon,
                               momentum=batch_norm_momentum),
                Swish(),

                # depth-wise
                SamePadConv2d(hidden_dim,
                              hidden_dim,
                              kernel_size,
                              stride,
                              groups=hidden_dim,
                              bias=False),
                nn.BatchNorm2d(hidden_dim,
                               eps=batch_norm_epsilon,
                               momentum=batch_norm_momentum),
                Swish(),
                Attention(channels=hidden_dim, reduction=se_reduction),

                # point-wise-linear
                SamePadConv2d(hidden_dim,
                              oup,
                              kernel_size=1,
                              stride=1,
                              bias=False),
                nn.BatchNorm2d(oup,
                               eps=batch_norm_epsilon,
                               momentum=batch_norm_momentum),
            )
示例#23
0
class Model(object):
    """
    region attention + scene-specific contexts
    """
    def __init__(self, name='rass', nimg=2048, nh=512, nw=512, na=512, nout=8843, ns=80, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                na = f.attrs['na']
                ns = f.attrs['ns']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'na': na, 'nout': nout, 'ns': ns, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nh, name=self.name+'@attention')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw+ns, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw+ns, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        scene = T.matrix('scene')
        self.inputs = [cap, img, scene]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat, scene])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # initialization for test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, na)).astype(theano.config.floatX))
        self._scene_shared = theano.shared(np.zeros((1, ns)).astype(theano.config.floatX))

    def compute(self, state, w_idx, feat, scene):
        # word embedding
        word_vec = self.embedding.compute(w_idx)
        # split states
        e_tm1, c_tm1, h_tm1 = split_state(state, scheme=[(1, self.config['na']), (2, self.config['nh'])])
        # attention
        e_t, alpha = self.attention.compute(feat, T.concatenate([e_tm1, h_tm1, word_vec], axis=1))
        # lstm step
        e_w_s = T.concatenate([e_t, word_vec, scene], axis=-1)
        c_t, h_t = self.lstm.compute(e_w_s, c_tm1, h_tm1)
        # merge state
        new_state = T.concatenate([e_t, c_t, h_t], axis=-1)
        # add w_{t-1} as feature
        e_h_w_s = T.concatenate([e_t, h_t, word_vec, scene], axis=-1)
        # predict probability
        p = self.pred_mlp.compute(e_h_w_s)
        return new_state, p, alpha

    def scan_func(self, w_tm1, w_t, state, feat, scene):
        # update state
        new_state, p, alpha = self.compute(state, w_tm1, feat, scene)
        # cross-entropy loss
        loss = T.nnet.categorical_crossentropy(p, w_t)
        return new_state, p, loss, alpha

    def init_func(self, img_value, scene_value):
        if self._proj_func is None:
            img = T.tensor3()
            self._proj_func = theano.function([img], self.proj_mlp.compute(img))
        if self._init_func is None:
            init_e = self._feat_shared.mean(axis=1)
            init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
            self._init_func = theano.function([], init_state)
        self._feat_shared.set_value(self._proj_func(img_value))
        self._scene_shared.set_value(scene_value)
        return self._init_func()

    def step_func(self, state_value, w_value):
        if self._step_func is None:
            w = T.ivector()
            state = T.matrix()
            new_state, p, _ = self.compute(state, w, self._feat_shared, self._scene_shared)
            self._step_func = theano.function([state, w], [new_state, T.log(p)])
        return self._step_func(state_value, w_value)

    def save_to_dir(self, save_dir, idx):
        save_file = osp.join(save_dir, self.name+'.h5.'+str(idx))
        for l in self.layers:
            l.save_weights(save_file)
        with h5py.File(save_file) as f:
            for k, v in self.config.items():
                f.attrs[k] = v

    def load_weights(self, model_file):
        for l in self.layers:
            l.load_weights(model_file)
示例#24
0
 def _setup_attention(self):
     """
     Creating attention layer.
     """
     self.attention = Attention(self.args.gcn_layers* self.args.capsule_dimensions, self.args.inner_attention_dimension)
示例#25
0
 def build_sentiment_classifier(self, x):
     x = Attention(384)(x)
     x = Dropout(0.2)(x)
     return Dense(NUM_SENTIMENTS, activation='softmax',
                  name='sen_output')(x)
示例#26
0
    def __init__(self,
                 name='ra',
                 nimg=2048,
                 na=512,
                 nh=512,
                 nw=512,
                 nout=8843,
                 npatch=30,
                 model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {
            'nimg': nimg,
            'na': na,
            'nh': nh,
            'nw': nw,
            'nout': nout,
            'npatch': npatch
        }

        # word embedding layer
        self.embedding = Embedding(n_emb=nout,
                                   dim_emb=nw,
                                   name=self.name + '@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2 * nh],
                            output_type='tanh',
                            name=self.name + '@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na],
                            output_type='tanh',
                            name=self.name + '@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na + nw,
                              dim_h=nh,
                              name=self.name + '@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na + nh + nw, nout],
                            output_type='softmax',
                            name=self.name + '@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na,
                                   dim_context=na + nw + nh,
                                   hsize=nh,
                                   name=self.name + '@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate(
            [init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(
            fn=self.scan_func,
            sequences=[cap[0:-1, :], cap[1:, :]],
            outputs_info=[init_state, None, None, None],
            non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [
            self.embedding, self.init_mlp, self.proj_mlp, self.attention,
            self.lstm, self.pred_mlp
        ]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(
            np.zeros((1, npatch, nimg)).astype(theano.config.floatX))
def char_word_HAN(max_words, max_sents, embed_size, vocab_cnt, gru_units,
                  drop_rate, att_size, re_drop, num_labels, fc_units,
                  classifier, loss_function, activation_func, pre_trained,
                  embedding_matrix):
    word_sent_inputs = Input(shape=(max_words[0], ), dtype='float64')
    word_embed = embedding_layers(vocab_cnt[0], embed_size, max_words[0],
                                  embedding_matrix[0],
                                  pre_trained)(word_sent_inputs)
    word_sent_enc = Bidirectional(
        GRU(gru_units[0],
            dropout=drop_rate[0],
            recurrent_dropout=re_drop[0],
            return_sequences=True))(word_embed)
    word_sent_att = Attention(att_size[0], name='AttLayer')(word_sent_enc)
    word_sent_model = Model(word_sent_inputs, word_sent_att)

    word_doc_inputs = Input(shape=(max_sents[0], max_words[0]),
                            dtype='float64',
                            name='word_inputs')
    word_doc_emb = TimeDistributed(word_sent_model)(word_doc_inputs)
    word_doc_enc = Bidirectional(
        GRU(gru_units[1],
            dropout=drop_rate[1],
            recurrent_dropout=re_drop[1],
            return_sequences=True))(word_doc_emb)
    word_doc_att = Attention(att_size[1], name='AttLayer_word')(word_doc_enc)

    word_fc1_drop = Dropout(drop_rate[1])(word_doc_att)
    word_fc1 = Dense(fc_units,
                     activation=activation_func,
                     kernel_initializer='he_normal')(word_fc1_drop)
    word_fc2_drop = Dropout(drop_rate[2])(word_fc1)

    char_sent_inputs = Input(shape=(max_words[1], ), dtype='float64')
    char_embed = embedding_layers(vocab_cnt[1], embed_size, max_words[1],
                                  embedding_matrix[1],
                                  pre_trained)(char_sent_inputs)
    char_sent_enc = Bidirectional(
        GRU(gru_units[0],
            dropout=drop_rate[0],
            recurrent_dropout=re_drop[0],
            return_sequences=True))(char_embed)
    char_sent_att = Attention(att_size[2], name='AttLayer')(char_sent_enc)
    char_sent_model = Model(char_sent_inputs, char_sent_att)

    char_doc_inputs = Input(shape=(max_sents[1], max_words[1]),
                            dtype='float64',
                            name='char_inputs')
    char_doc_emb = TimeDistributed(char_sent_model)(char_doc_inputs)
    char_doc_enc = Bidirectional(
        GRU(gru_units[1],
            dropout=drop_rate[1],
            recurrent_dropout=re_drop[1],
            return_sequences=True))(char_doc_emb)
    char_doc_att = Attention(att_size[3], name='AttLayer_char')(char_doc_enc)

    char_fc1_drop = Dropout(drop_rate[1])(char_doc_att)
    char_fc1 = Dense(fc_units,
                     activation=activation_func,
                     kernel_initializer='he_normal')(char_fc1_drop)
    char_fc2_drop = Dropout(drop_rate[2])(char_fc1)

    merge_info = concatenate([word_fc2_drop, char_fc2_drop], axis=1)
    output = Dense(num_labels, activation=classifier, name='out')(merge_info)

    model = Model(inputs=[word_doc_inputs, char_doc_inputs], outputs=output)

    nadam = optimizers.Nadam(clipnorm=1.)
    model.compile(loss=loss_function, optimizer=nadam, metrics=['accuracy'])
    return model
embedded = Embedding(input_dim=vocabulary_size, output_dim=embedding_dims)(X)

# Recurrent Layer
if config != 0:
    encoder_output, hidden_state, cell_state = CuDNNLSTM(
        units=512, return_sequences=True, return_state=True)(embedded)
    attention_input = [encoder_output, hidden_state]
else:
    encoder_output = CuDNNLSTM(units=512)(embedded)

# Optional Attention Mechanisms
if config == 1:
    encoder_output, attention_weights = SelfAttention(
        size=50, num_hops=16, use_penalization=False)(encoder_output)
elif config == 2:
    encoder_output, attention_weights = Attention(
        context='many-to-one', alignment_type='global')(attention_input)
    encoder_output = Flatten()(encoder_output)
elif config == 3:
    encoder_output, attention_weights = Attention(
        context='many-to-one', alignment_type='local-p*',
        window_width=25)(attention_input)
    encoder_output = Flatten()(encoder_output)

# Prediction Layer
Y = Dense(units=vocabulary_size, activation='softmax')(encoder_output)

# Compile model
model = Model(inputs=X, outputs=Y)
model.compile(loss=loss,
              optimizer='adam',
              metrics=[perplexity, categorical_accuracy])
示例#29
0
 def build_category_classifier(self, x):
     x = Attention(384)(x)
     x = Dropout(0.2)(x)
     return Dense(NUM_CATEGORIES, activation='softmax',
                  name='cat_output')(x)
示例#30
0
 def _setup_attention(self):
     self.attention = Attention(
         self.args.gcn_layers * self.args.capsule_dimensions,
         self.args.inner_attention_dimension)
embedded_target = Embedding(input_dim=target_vocabulary_size, output_dim=embedding_dim)(X_target)
# NOTE: The embedded target sequences (deriving from X_target) allow us to enforce Teacher Forcing:
# using the actual output (correct translation) from the training dataset at the current time step
# as input in the next time step, rather than the output generated by the network.

# Recurrent Layers
# i)  Encoder
encoder_output = CuDNNLSTM(units=128, return_sequences=True)(embedded_input)
# ii) Decoder
decoder_recurrent_layer = CuDNNLSTM(units=128, return_state=True)
# NOTE: The encoder is always fully vectorized and returns the hidden representations of the whole
# sequence at once, whereas the decoder does this step by step.

# Optional Attention Mechanism
if config == 1:
    attention_layer = Attention(context='many-to-many', alignment_type='global')
elif config == 2:
    attention_layer = Attention(context='many-to-many', alignment_type='local-m')
elif config == 3:
    attention_layer = Attention(context='many-to-many', alignment_type='local-p')

# Prediction Layer
decoder_dense_layer = Dense(units=target_vocabulary_size, activation='softmax')

# Training Loop
outputs = []
for timestep in range(target_sequence_length):
    # Get current input in from embedded target sequences
    current_word = Lambda(lambda x: x[:, timestep: timestep+1, :])(embedded_target)
    # Apply optional attention mechanism
    if config != 0:
示例#32
0
 def __init__(self, hidden_dim, output_dim):
     super().__init__()
     self.embedding = Embedding(output_dim, hidden_dim, mask_zero=True)
     self.lstm = LSTM(hidden_dim, return_state=True, return_sequences=True)
     self.attn = Attention(hidden_dim, hidden_dim)
     self.out = Dense(output_dim, activation='softmax')