示例#1
0
    def __init__(self, n_labels, settings, unfactorized, lonely_only=True):
        super().__init__()
        self.unfactorized = unfactorized

        if lonely_only:
            fnn_input = settings.hidden_lstm * 2
        else:
            fnn_input = settings.hidden_lstm * 2 * 2

        self.label_head_fnn = nn.Linear(fnn_input, settings.dim_mlp)
        self.label_dep_fnn = nn.Linear(fnn_input, settings.dim_mlp)

        self.label_attention = Attention.label_factory(settings.dim_mlp,
                                                       n_labels,
                                                       settings.attention)

        self.dim_lstm = settings.hidden_lstm

        self.dropout_label = settings.dropout_label
        self.dropout_main_ff = settings.dropout_main_ff
        self.locked_dropout = LockedDropout()

        if not self.unfactorized:
            self.edge_head_fnn = nn.Linear(fnn_input, settings.dim_mlp)
            self.edge_dep_fnn = nn.Linear(fnn_input, settings.dim_mlp)

            self.edge_attention = Attention.edge_factory(
                settings.dim_mlp, settings.attention)

            self.dropout_edge = settings.dropout_edge
示例#2
0
 def __init__(self,
              maxlen_sentence,
              maxlen_word,
              max_features,
              embedding_dims,
              class_num=1,
              last_activation='sigmoid'):
     super(HAN, self).__init__()
     self.maxlen_sentence = maxlen_sentence
     self.maxlen_word = maxlen_word
     self.max_features = max_features
     self.embedding_dims = embedding_dims
     self.class_num = class_num
     self.last_activation = last_activation
     # Word part
     input_word = Input(shape=(self.maxlen_word, ))
     x_word = Embedding(self.max_features,
                        self.embedding_dims,
                        input_length=self.maxlen_word)(input_word)
     x_word = Bidirectional(CuDNNLSTM(128, return_sequences=True))(
         x_word)  # LSTM or GRU
     x_word = Attention(self.maxlen_word)(x_word)
     model_word = Model(input_word, x_word)
     # Sentence part
     self.word_encoder_att = TimeDistributed(model_word)
     self.sentence_encoder = Bidirectional(
         CuDNNLSTM(128, return_sequences=True))  # LSTM or GRU
     self.sentence_att = Attention(self.maxlen_sentence)
     # Output part
     self.classifier = Dense(self.class_num,
                             activation=self.last_activation)
    def __init__(self, encoder,
                 embed_hidden=300,
                 mlp_hidden=512,
                 time_steps=3):
        super(TopDownWithContext, self).__init__()

        self.encoder = encoder
        self.n_verbs = self.encoder.get_num_verbs()
        self.vocab_size = self.encoder.get_num_labels()
        self.max_role_count = self.encoder.get_max_role_count()
        self.n_role_q_vocab = len(self.encoder.question_words)

        self.w_emb = nn.Embedding(self.n_role_q_vocab + 1, embed_hidden, padding_idx=self.n_role_q_vocab)
        self.q_emb = nn.LSTM(embed_hidden, mlp_hidden,
                             batch_first=True, bidirectional=True)
        self.lstm_proj = nn.Linear(mlp_hidden * 2, mlp_hidden)
        self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden)
        self.ctx_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden)
        self.context = FCNet([mlp_hidden*2, mlp_hidden])
        #self.role_weight = RoleWeightAttention(mlp_hidden, mlp_hidden, mlp_hidden)
        self.detailedq = FCNet([mlp_hidden*2, mlp_hidden])
        self.concat = FCNet([mlp_hidden*2, mlp_hidden])
        self.q_net = FCNet([mlp_hidden, mlp_hidden])
        self.v_net = FCNet([mlp_hidden, mlp_hidden])
        self.classifier = SimpleClassifier(
            mlp_hidden, 2 * mlp_hidden, self.vocab_size, 0.5)
        self.time_steps = time_steps
示例#4
0
    def __init__(self, bert, opt):
        super(Net, self).__init__()
        self.opt = opt
        self.bert = bert
        self.squeeze_embedding = SqueezeEmbedding()
        self.dropout = nn.Dropout(opt.dropout)

        self.attn_k = Attention(opt.bert_dim,
                                out_dim=opt.hidden_dim,
                                n_head=8,
                                score_function='mlp',
                                dropout=opt.dropout)
        self.attn_q = Attention(opt.bert_dim,
                                out_dim=opt.hidden_dim,
                                n_head=8,
                                score_function='mlp',
                                dropout=opt.dropout)
        self.ffn_c = PositionwiseFeedForward(opt.hidden_dim,
                                             dropout=opt.dropout)
        self.ffn_t = PositionwiseFeedForward(opt.hidden_dim,
                                             dropout=opt.dropout)

        self.attn_s1 = Attention(opt.hidden_dim,
                                 n_head=8,
                                 score_function='mlp',
                                 dropout=opt.dropout)

        self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
示例#5
0
class GeneratorN(nn.Module):
    def __init__(self, use_self_attention=False):
        super().__init__()
        self.residuals = nn.Sequential(
            *[Residual(D_GF * 2) for _ in range(RESIDUALS)])
        self.attn = Attention(D_GF, D_HIDDEN)
        self.upsample = upsample_block(D_GF * 2, D_GF)
        self.use_self_attention = use_self_attention

        if self.use_self_attention:
            self.self_attn = self_attn_block()

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'GeneratorN params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )

    def forward(self, h_code, c_code, word_embs, mask):
        """
            h_code1(query), output of previous generator:  batch x D_GF x ih x iw (queryL=ihxiw)
            word_embs(context): batch x D_COND x seq_len
            c_code1: batch x D_GF x ih x iw
            att1: batch x sourceL x ih x iw
        """
        self.attn.applyMask(mask)
        c_code, att = self.attn(h_code, word_embs)
        # Image-text attention first, image-image attention second
        if self.use_self_attention:
            c_code = self.self_attn(c_code)

        out_code = torch.cat((h_code, c_code), 1)
        out_code = self.residuals(out_code)
        out_code = self.upsample(out_code)  # D_GF/2 x 2ih x 2iw

        return out_code, att
    def get_model(self):
        # Word part
        sentence_input = Input(shape=(self.max_sen_len, ), dtype='int32')
        if not self.embedding:
            emb_layer = Embedding(self.input_dim,
                                  self.embedding_dims,
                                  input_length=self.max_sen_len,
                                  trainable=True,
                                  mask_zero=True)
        else:
            emb_layer = Embedding(self.embedding.shape[0],
                                  self.embedding.shape[1],
                                  input_length=self.max_sen_len,
                                  weights=[self.embedding],
                                  trainable=True,
                                  mask_zero=True)

        embedded_sequences = emb_layer(sentence_input)
        gru_word = Bidirectional(CuDNNGRU(
            128, return_sequences=True))(embedded_sequences)
        word_atten = Attention(128)(gru_word)
        model_word = Model(sentence_input, word_atten)

        # Sentence part
        input = Input(shape=(self.max_sents, self.max_sen_len))
        x_sentences = TimeDistributed(model_word)(input)
        gru_sent = Bidirectional(CuDNNGRU(128,
                                          return_sequences=True))(x_sentences)
        sent_atten = Attention(128)(gru_sent)
        output = Dense(self.class_num,
                       activation=self.last_activation)(sent_atten)
        model = Model(inputs=input, outputs=output)
        return model
示例#7
0
    def __init__(self, num_layers, hidden_size, output_size, device):
        super(SpellNet, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.embedded = nn.Embedding(self.output_size, self.hidden_size)
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size, self.num_layers, batch_first=True)

        self.attention_video_rnn = AttentionRNN.AttnDecoderRNN(256, 40)
        self.attention_audio_rnn = AttentionRNN.AttnDecoderRNN(256, 40)

        self.attentionVideo = Attention.AttentionNet(hidden_size, hidden_size)
        self.attentionAudio = Attention.AttentionNet(hidden_size, hidden_size)
        self.mlp = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 256),
            nn.ReLU(),
            nn.Linear(256, output_size)
        )

        self.inp_mlp = nn.Sequential(
            nn.Linear(100, 40),
            nn.ReLU()
        )

        ## Move to Device
        self.embedded = self.embedded.to(device)
        self.lstm = self.lstm.to(device)
        self.attentionVideo = self.attentionVideo.to(device)
        self.attentionAudio = self.attentionAudio.to(device)
        self.mlp = self.mlp.to(device)
示例#8
0
    def model(self):
        config = model_parameters.model_config()
        inputs = Input(shape=(config.num_of_fields * config.time_steps,))
        embeddings = Embedding(config.vocab_size, config.token_embedding_dim,
                               input_length=(config.num_of_fields * config.time_steps))(inputs)
        reshape = Reshape((config.time_steps, (config.token_embedding_dim * config.num_of_fields)),
                          input_shape=((config.num_of_fields * config.time_steps), config.token_embedding_dim))(embeddings)
        dropout_embeddings = Dropout(config.dropout_rate)(reshape)
        past = Lambda(lambda x : x[:,:26,:])(dropout_embeddings)
        future = Lambda(lambda x : x[:,25:,:])(dropout_embeddings)
        past_LSTM = LSTM(config.token_embedding_dim * config.num_of_fields,  go_backwards=True, return_sequences=True)(past)
        future_LSTM = LSTM(config.token_embedding_dim * config.num_of_fields,  go_backwards=False, return_sequences=True)(future)
        #merged = Concatenate(axis=1)([past_LSTM, future_LSTM])
        attention_1 =  Attention()(past_LSTM)
        attention_2 = Attention()(future_LSTM)
        merged = Concatenate(axis=1)([attention_1, attention_2])
        dense1 = Dense(2000, activation='relu')(merged)
        dense2 = Dense(1000, activation='relu')(dense1)
        outputs = Dense(2, activation='softmax')(dense2)
        model = Model(inputs=inputs, outputs=outputs)

        model.compile(optimizer=Adam(lr=config.lr, clipvalue=5.0),
                      loss="hinge",
                      metrics=['binary_accuracy'])
        print(model.summary())

        return model
示例#9
0
    def __init__(self,
                 num_classes: int = 2,
                 bidirectional: bool = False,
                 rnn_layers: int = 1,
                 hidden_size: int = 256,
                 rnn_type: str = 'GRU'):

        super(ATAE_LSTM, self).__init__()

        self.stackedembeddings: StackedEmbeddings = StackedEmbeddings([
            FlairEmbeddings('news-forward'),
            FlairEmbeddings('news-backward')
        ])
        self.wordembeddings: StackedEmbeddings = StackedEmbeddings(
            [WordEmbeddings('glove')])
        self.embedding_dimension: int = self.stackedembeddings.embedding_length + self.wordembeddings.embedding_length
        self.bidirectional: bool = bidirectional
        self.rnn_layers: int = rnn_layers
        self.rnn_type: str = rnn_type
        self.num_classes: int = num_classes
        self.hidden_size: int = hidden_size

        if self.rnn_type == 'GRU':
            self.rnn = torch.nn.GRU(self.embedding_dimension,
                                    self.hidden_size,
                                    bidirectional=self.bidirectional,
                                    num_layers=self.rnn_layers)
        else:
            self.rnn = torch.nn.LSTM(self.embedding_dimension,
                                     self.hidden_size,
                                     bidirectional=self.bidirectional,
                                     num_layers=self.rnn_layers)

        self.attention = Attention()
def visualization(nums=5):
    (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data(
        r'D:\pyproject\data\cifar-10-batches-py')
    x_test = x_test / 255.0
    n_classes = 10

    _inputs = Input(shape=(32, 32, 3), name='input')
    _layer = ConvBlock(64, 3)(_inputs)
    _layer = ConvBlock(128, 3)(_layer)
    c1 = ConvBlock(256, 3, pooling=True)(_layer)
    c2 = ConvBlock(512, 3, pooling=True)(c1)
    c3 = ConvBlock(512, 3, pooling=True)(c2)
    _layer = ConvBlock(512, 3, pooling=True)(c3)
    _layer = ConvBlock(512, 3, pooling=True)(_layer)
    _layer = Flatten()(_layer)
    _g = Dense(512, activation='relu')(_layer)
    att1_f, att1_map = Attention((16, 16, 256), 512, method='pc')([c1, _g])
    att2_f, att2_map = Attention((8, 8, 512), 512, method='pc')([c2, _g])
    att3_f, att3_map = Attention((4, 4, 512), 512, method='pc')([c3, _g])
    f_concat = tf.concat([att1_f, att2_f, att3_f], axis=1)
    _out = Dense(n_classes, 'softmax')(f_concat)
    model = Model(_inputs, [_out, att1_map, att2_map, att3_map])
    model.load_weights(r'D:\pyproject\data\CIFAR10-tensorflow\vgg-att.h5')
    # model.compile()
    index_lst = np.arange(len(x_test))
    np.random.shuffle(index_lst)
    x_test = x_test[index_lst]
    x_test = x_test[0:nums]
    y_test = y_test[index_lst]
    y_test = y_test[0:nums]
    pred, att1_map, att2_map, att3_map, = model.predict(x_test)
    pred = tf.argmax(pred, axis=1)
    att1_map = tf.squeeze(
        tf.image.resize(tf.expand_dims(att1_map, -1), (32, 32)), -1)
    att2_map = tf.squeeze(
        tf.image.resize(tf.expand_dims(att2_map, -1), (32, 32)), -1)
    att3_map = tf.squeeze(
        tf.image.resize(tf.expand_dims(att3_map, -1), (32, 32)), -1)
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')
    for i in range(nums):
        img = x_test[i]
        plt.subplot(141)
        plt.imshow(img)
        plt.title('Img')
        plt.subplot(142)
        plt.imshow(img)
        plt.imshow(att1_map[i], alpha=0.4, cmap='rainbow')
        plt.title('att1_map')
        plt.subplot(143)
        plt.imshow(img)
        plt.imshow(att2_map[i], alpha=0.4, cmap='rainbow')
        plt.title('att2_map')
        plt.subplot(144)
        plt.imshow(img)
        plt.imshow(att3_map[i], alpha=0.4, cmap='rainbow')
        plt.title('att3_map')
        plt.suptitle(
            f'Prediction={classes[pred[i]]} True={classes[y_test[i][0]]}')
        plt.show()
示例#11
0
    def __init__(self, global_model, aggregation="normal_atten"):
        # FLServer(GlobalModel_MNIST_CNN, "127.0.0.1", 5000, gpu)

        # os.environ['CUDA_VISIBLE_DEVICES'] = '%d'%gpu
        self.global_model = global_model()
        self.ready_client_sids = set()

        # self.host = host
        # self.port = port
        self.client_resource = {}

        self.wait_time = 0

        self.model_id = str(uuid.uuid4())

        self.aggregation = aggregation
        self.attention_mechanism = Attention()
        #####
        # training states
        self.current_round = -1  # -1 for not yet started
        self.current_round_client_updates = []
        self.eval_client_updates = []
        #####

        self.invalid_tolerate = 0
示例#12
0
 def _self_attention(self, q, k, v, seq_len):
     with tf.variable_scope("self-attention"):
         attention = Attention(num_heads=self.num_heads,
                               mode="encoder",
                               linear_key_dim=self.linear_key_dim,
                               linear_value_dim=self.linear_value_dim,
                               model_dim=self.model_dim,
                               dropout=self.dropout)
         return attention.multi_head(q, k, v, seq_len)
 def forward(self, hs_enc, hs_dec):
     N, T, H = hs_dec.shape
     out = np.empty_like(hs_dec)
     for t in range(T):
         layer = Attention()
         out[:, t, :] = layer.forward(hs_enc, hs_dec[:, t, :])
         self.layers.append(layer)
         self.attention_weights.append(layer.attention_weight)
     return out
示例#14
0
 def _encoder_decoder_attention(self, q, k, v, bias):
     with tf.variable_scope("encoder-decoder-attention"):
         attention = Attention(num_heads=self.num_heads,
                               mode="encoder-decoder-attention",
                               linear_key_dim=self.linear_key_dim,
                               linear_value_dim=self.linear_value_dim,
                               model_dim=self.model_dim,
                               dropout=self.dropout)
         return attention.multi_head(q, k, v, bias)
示例#15
0
 def _masked_self_attention(self, q, k, v, bias):
     with tf.variable_scope("masked-self-attention"):
         attention = Attention(num_heads=self.num_heads,
                               mode="masked-self-attention",
                               linear_key_dim=self.linear_key_dim,
                               linear_value_dim=self.linear_value_dim,
                               model_dim=self.model_dim,
                               dropout=self.dropout)
         return attention.multi_head(q, k, v, bias)
示例#16
0
 def _self_attention(self, q, k, v, future, sos, seq_len):
     with tf.variable_scope("self-attention"):
         attention = Attention(num_heads=self.num_heads,
                               masked=True,
                               linear_key_dim=self.linear_key_dim,
                               linear_value_dim=self.linear_value_dim,
                               model_dim=self.model_dim,
                               dropout=self.dropout,
                               batch_size=self.batch_size)
         return attention.multi_head(q, k, v, future, sos, seq_len)
示例#17
0
 def _pooling_layer(self, q, k, v, seq_len):
     with tf.variable_scope("self-attention"):
         attention = Attention(num_heads=self.num_heads,
                               masked=True,
                               linear_key_dim=self.linear_key_dim,
                               linear_value_dim=self.linear_value_dim,
                               model_dim=self.model_dim,
                               dropout=self.dropout,
                               batch_size=self.batch_size)
         return attention.classifier_head(q, k, v, seq_len)
示例#18
0
    def __init__(self, options):
        super(LSTM_Att, self).__init__()

        self.ques_emb = QuestionEmbedding(options)
        self.image_mlp_act = nn.Linear(options['n_image_feat'],
                                       options['n_dim'])
        self.att1 = Attention(options)
        self.att1 = Attention(options)
        self.combined_mlp_drop_0 = nn.Dropout(p=options['drop_ratio'])
        self.combined_mlp_0 = nn.Linear(options['n_dim'], options['n_output'])
示例#19
0
def seq2seq_model(x_train_1, x_train_2):
    #encoder
    S_inputs = Input(shape=(x_train_1.shape[1], x_train_1.shape[2]))
    # embeddings = Embedding(max_features, 128)(S_inputs)
    # embeddings = Position_Embedding()(S_inputs)  # 增加Position_Embedding能轻微提高准确率
    encoded = Attention(32, 32)([S_inputs, S_inputs, S_inputs])
    # O_seq=Attention(16, 16)([O_seq, O_seq, O_seq])
    # O_seq = GlobalAveragePooling1D()(O_seq)
    # O_seq = Dropout(dropout)(O_seq)
    # outputs = Dense(3, activation='softmax')(O_seq)

    #decoder
    decoder = RecurrentSequential(
        decode=True,
        output_length=1,  # x_train_2.shape[1]
        unroll=False,
        stateful=False)
    decoder.add(
        Dropout(dropout,
                batch_input_shape=(None, x_train_1.shape[1], hidden_dim)))
    if depth[1] == 1:
        decoder.add(
            AttentionDecoderCell(output_dim=x_train_2.shape[2],
                                 hidden_dim=hidden_dim))
    else:
        decoder.add(
            AttentionDecoderCell(output_dim=x_train_2.shape[2],
                                 hidden_dim=hidden_dim))
        for _ in range(depth[1] - 2):
            decoder.add(Dropout(dropout))
            decoder.add(
                LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim))
        decoder.add(Dropout(dropout))
        decoder.add(
            LSTMDecoderCell(output_dim=x_train_2.shape[2],
                            hidden_dim=hidden_dim))

    #regression model
    x = Attention(8, 16)([encoded, encoded, encoded])
    x = GlobalAveragePooling1D()(x)
    x = Dropout(dropout)(x)
    regr_outputs = Dense(3, activation='softmax')(x)

    decoded = decoder(encoded)
    decoded = Reshape((x_train_2.shape[2], ))(decoded)
    model = Model(inputs=S_inputs, outputs=[decoded, regr_outputs])
    print(model.summary())

    # try using different optimizers and different optimizer configs
    model.compile(loss=['mse', 'categorical_crossentropy'],
                  loss_weights=[1, 10],
                  optimizer='adam',
                  metrics=['categorical_accuracy'])

    return model
示例#20
0
def build_baseline0(dataset, num_hid):
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att1 = Attention(dataset.v_dim, q_emb.num_hid, num_hid)
    v_att2 = Attention(dataset.v_dim, q_emb.num_hid + dataset.v_dim, num_hid)

    q_net = FCNet([num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    classifier = SimpleClassifier(num_hid, 2 * num_hid,
                                  dataset.num_ans_candidates, 0.5)
    return SANModel1(w_emb, q_emb, v_att1, v_att2, q_net, v_net, classifier)
示例#21
0
def build_baseline(dataset,opt):
    opt=config.parse_opt()
    w_emb=WordEmbedding(dataset.dictionary.ntokens(),300,opt.EMB_DROPOUT)
    q_emb=QuestionEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    v_emb=VideoEmbedding(opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    v_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT)
    r_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT)
    v_fc=Videofc(opt.GLIMPSE,opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.FC_DROPOUT)
    a_emb=AnswerEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    rela_emb = Rela_Module(opt.NUM_HIDDEN*3,opt.NUM_HIDDEN,opt.NUM_HIDDEN)
    classifier=SimpleClassifier(opt.NUM_HIDDEN,opt.MID_DIM,dataset.num_ans,opt.FC_DROPOUT)
    return BaseModel(w_emb,q_emb,v_emb,a_emb,v_att,v_fc,rela_emb,r_att,classifier,opt)
示例#22
0
 def __init__(self, pretrained_vgg: VGG, method):
     super(VGG_Att, self).__init__()
     self.__vgg = pretrained_vgg
     self.__att1 = Attention(local_shape=(256, 16, 16), global_shape=512, method=method)
     self.__att2 = Attention(local_shape=(512, 8, 8), global_shape=512, method=method)
     self.__att3 = Attention(local_shape=(512, 4, 4), global_shape=512, method=method)
     self.__classifier = nn.Sequential(
         nn.Linear(256+512+512, 10),
         nn.LogSoftmax(1))
     self.__unsample_1 = nn.UpsamplingBilinear2d(scale_factor=2)
     self.__unsample_2 = nn.UpsamplingBilinear2d(scale_factor=4)
     self.__unsample_3 = nn.UpsamplingBilinear2d(scale_factor=8)
示例#23
0
def build_news_encoder(word_index, category_map, subcategory_map):
    embedding_matrix = get_embedding_matrix(word_index)
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                trainable=True)
    news_input = Input((MAX_TITLE_LENGTH+MAX_ABSTRACT_LENGTH+2, ), dtype='int32')

    # title
    title_input = Lambda(lambda x: x[:, : MAX_TITLE_LENGTH])(news_input)
    title_embedded_sequences = embedding_layer(title_input)
    title_embedded_sequences = Dropout(0.2)(title_embedded_sequences)
    title_cnn = Conv1D(400, 3, padding='same', activation='relu', strides=1)(title_embedded_sequences)
    title_cnn = Dropout(0.2)(title_cnn)
    title_attention = Attention(200)(title_cnn)
    title_attention = Reshape((1, 400))(title_attention)

    # abstract
    abstract_input = Lambda(lambda x: x[:, MAX_TITLE_LENGTH : MAX_ABSTRACT_LENGTH + MAX_TITLE_LENGTH])(news_input)
    abstract_embedded_sequences = embedding_layer(abstract_input)
    abstract_embedded_sequences = Dropout(0.2)(abstract_embedded_sequences)
    abstract_cnn = Conv1D(400, 3, padding='same', activation='relu', strides=1)(abstract_embedded_sequences)
    abstract_cnn = Dropout(0.2)(abstract_cnn)
    abstract_attention = Attention(200)(abstract_cnn)
    abstract_attention = Reshape((1, 400))(abstract_attention)

    # category
    category_input = Lambda(lambda x: x[:, MAX_TITLE_LENGTH + MAX_ABSTRACT_LENGTH : MAX_TITLE_LENGTH + MAX_ABSTRACT_LENGTH + 1])(news_input)
    category_embedding_layer = Embedding(len(category_map) + 1,
                                        C_EMBEDDING_DIM,
                                        trainable=True)
    category_embedded = category_embedding_layer(category_input)
    category_dense = Dense(400, activation='relu')(category_embedded)
    category_dense = Reshape((1, 400))(category_dense)

    # subcategory
    subcategory_input = Lambda(lambda x: x[:, MAX_TITLE_LENGTH + MAX_ABSTRACT_LENGTH + 1 : ])(news_input)
    subcategory_embedding_layer = Embedding(len(subcategory_map) + 1,
                                        C_EMBEDDING_DIM,
                                        trainable=True)
    subcategory_embedded = subcategory_embedding_layer(subcategory_input)
    subcategory_dense = Dense(400, activation='relu')(subcategory_embedded)
    subcategory_dense = Reshape((1, 400))(subcategory_dense)

    # concatenate
    news_r = Concatenate(axis=-2)([title_attention, abstract_attention, category_dense, subcategory_dense])
    news_r = Attention(200)(news_r)

    news_encoder = Model(news_input, news_r, name='news_encoder')
    # from tensorflow.keras.utils import plot_model
    # plot_model(news_encoder, to_file='news_encoder.png', show_shapes=True)
    return news_encoder
示例#24
0
 def __init__(self, embedding_matrix):
     super(IAN, self).__init__()
     self.embed = nn.Embedding.from_pretrained(
         torch.tensor(embedding_matrix, dtype=torch.float))
     self.lstm = nn.LSTM(embed_dim,
                         hidden_dim,
                         lstm_layers,
                         batch_first=True)
     self.attention_aspect = Attention(hidden_dim,
                                       score_function='bi_linear')
     self.attention_context = Attention(hidden_dim,
                                        score_function='bi_linear')
     self.dense = nn.Linear(hidden_dim * 2, polarities_dim)
示例#25
0
 def __init__(self, v_dim, a_dim, l_dim, hidden_size, w_emb, l_emb):
     """Encode language prior with different modality features"""
     super(BaselineEncoder, self).__init__()
     self.hidden_size = hidden_size
     self.w_emb = w_emb  # WordEmbedding
     self.l_emb = l_emb  # SequenceEmbedding
     self.v_att = Attention(
         v_dim, l_dim,
         hidden_size)  # Attention(vis_hid_dim, dialoge_dim, hidden_size)
     self.a_att = Attention(
         a_dim, l_dim,
         hidden_size)  # Attention(aud_hid_dim, dialoge_dim, hidden_size)
     self.c2d_v = FC([v_dim, hidden_size])  # On paper
     self.c2d_a = FC([a_dim, hidden_size])
示例#26
0
    def __init__(self, use_self_attention=False):
        super().__init__()
        self.residuals = nn.Sequential(
            *[Residual(D_GF * 2) for _ in range(RESIDUALS)])
        self.attn = Attention(D_GF, D_HIDDEN)
        self.upsample = upsample_block(D_GF * 2, D_GF)
        self.use_self_attention = use_self_attention

        if self.use_self_attention:
            self.self_attn = self_attn_block()

        p_trainable, p_non_trainable = count_params(self)
        print(
            f'GeneratorN params: trainable {p_trainable} - non_trainable {p_non_trainable}'
        )
class TestAttemtion(unittest.TestCase):
    def setUp(self):
        self.attention = Attention()
        self.hs = np.random.randn(10, 5, 4)
        self.h = np.random.randn(10, 4)

    def test_forward(self):
        out = self.attention.forward(self.hs, self.h)
        self.assertEqual((10, 4), out.shape)

    def test_backward(self):
        dout = self.attention.forward(self.hs, self.h)
        dhs, dh = self.attention.backward(dout)
        self.assertEqual((10, 5, 4), dhs.shape)
        self.assertEqual((10, 5), dh.shape)
示例#28
0
def build_baseline(dataset,opt):
    w_emb=WordEmbedding(dataset.dictionary.ntokens(),300,opt.EMB_DROPOUT)
    q_emb=QuestionEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    v_emb=VideoEmbedding(opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    v_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT)
    r_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT)
    v_fc=Videofc(opt.GLIMPSE,opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.FC_DROPOUT)
    a_emb=AnswerEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT)
    rela_emb = Rela_Module(opt.NUM_HIDDEN*3,opt.NUM_HIDDEN,opt.NUM_HIDDEN)
    classifier=SimpleClassifier(opt.NUM_HIDDEN*2,opt.MID_DIM,1,opt.FC_DROPOUT)
    ques_att = Q_Att(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT)
    #vlinear=FCNet([opt.NUM_HIDDEN,opt.MID_DIM,opt.NUM_HIDDEN])
    #rlinear=FCNet([opt.NUM_HIDDEN,opt.MID_DIM,opt.NUM_HIDDEN])
    
    return BaseModel(w_emb,q_emb,v_emb,a_emb,v_att,v_fc,rela_emb,r_att,classifier,ques_att,opt)
def main(output_path=r'D:\pyproject\data\CIFAR10-tensorflow'):
    (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data(
        r'D:\pyproject\data\cifar-10-batches-py')
    x_train = x_train / 255.0
    x_test = x_test / 255.0
    y_train = utils.to_categorical(y_train, num_classes=10)
    y_test = utils.to_categorical(y_test, num_classes=10)
    epochs = 30
    batch_size = 128
    n_classes = 10
    model_path_loss = output_path + r"\vgg-att.h5"
    save_model_loss = ModelCheckpoint(model_path_loss,
                                      monitor='val_loss',
                                      save_best_only=True,
                                      verbose=2)

    _inputs = Input(shape=(32, 32, 3), name='input')
    _layer = ConvBlock(64, 3)(_inputs)
    _layer = ConvBlock(128, 3)(_layer)
    c1 = ConvBlock(256, 3, pooling=True)(_layer)
    c2 = ConvBlock(512, 3, pooling=True)(c1)
    c3 = ConvBlock(512, 3, pooling=True)(c2)
    _layer = ConvBlock(512, 3, pooling=True)(c3)
    _layer = ConvBlock(512, 3, pooling=True)(_layer)
    _layer = Flatten()(_layer)
    _g = Dense(512, activation='relu')(_layer)
    _outputs = Dense(n_classes, activation='softmax')(_g)
    vgg = Model(_inputs, _outputs)
    vgg.load_weights(output_path + r"\vgg.h5")

    att1_f, att1_map = Attention((16, 16, 256), 512, method='pc')([c1, _g])
    att2_f, att2_map = Attention((8, 8, 512), 512, method='pc')([c2, _g])
    att3_f, att3_map = Attention((4, 4, 512), 512, method='pc')([c3, _g])
    f_concat = tf.concat([att1_f, att2_f, att3_f], axis=1)
    _out = Dense(n_classes, 'softmax')(f_concat)
    model = Model(_inputs, _out)
    opt = optimizers.SGD(learning_rate=0.01, momentum=0.9)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    model.summary()
    model.fit(x_train,
              y_train,
              batch_size=batch_size,
              validation_data=(x_test, y_test),
              epochs=epochs,
              verbose=1,
              callbacks=[save_model_loss])
示例#30
0
 def __init__(self, embedding_matrix):
     super(MemNet, self).__init__()
     self.embed = nn.Embedding.from_pretrained(
         torch.tensor(embedding_matrix, dtype=torch.float))
     self.attention = Attention(embed_dim, score_function='mlp')
     self.x_linear = nn.Linear(embed_dim, embed_dim)
     self.dense = nn.Linear(embed_dim, polarities_dim)
示例#31
0
class Constructor:

    def __init__(self, datafile, n_objects, n_types, max_attention_depth, max_attention_objects,
                 computer_depth, n_functions, test_fraction=0, data_fraction=1, stochastic=False, batch_size=0,
                 save_file="best_construct.pkl", sep_features_targets=False):
        """
        :param datafile: A list containing input/target pairs, e.g. [[input, target], ...]
        :param data_fraction: Fraction of the dataset to use.
        :param test_fraction: The fraction of the dataset to reserve for testing.
        :param n_objects: The number of objects which Structure reduces the data to.
        :param n_types: The number of types SemanticalMapper maps the objects to. This is the number of separate types
                        as defined by their behavior computed by TypeComputer.
        :param max_attention_depth: The depth of reduction in the Structure that Attention will look for type pairs in.
        :param max_attention_objects: The number of type pairs in each layer of Structure that Attention will look for.
        :param computer_depth: The number of times types will be recursively passed through the functions of
                               TypeComputer.
        :param n_functions: The size of the function set of TypeComputer.
        """

        self.n_objects = n_objects
        self.n_types = n_types
        self.max_attention_depth = max_attention_depth
        self.max_attention_objects = max_attention_objects
        self.computer_depth = computer_depth
        self.n_functions = n_functions
        self.stochastic = stochastic
        self.batch_size = batch_size
        self.save_file = save_file

        self.best_fit = 0.0

        df = open(datafile, "rb")
        self.data = cPickle.load(df)
        df.close()

        features = [str(self.data[i][2:]) for i in range(len(self.data))]
        targets = [str(self.data[i][1]) for i in range(len(self.data))]

        self.data = zip(features, targets)

        if sep_features_targets:
            self.data = zip(self.data[0], self.data[1])

        if data_fraction < 1:
            self.data = [pair for pair in self.data if r.random() < data_fraction]

        print self.data[0][0]
        print self.data[0][1]

        train_len = int(round(len(self.data)*(1 - test_fraction)))
        self.train_data = self.data[:train_len]
        self.test_data = self.data[train_len:]

        print len(self.train_data)
        print len(self.test_data)

        inputs = []
        outputs = []
        input_symbols = []

        for pair in self.data:
            inputs.append(pair[0])
            outputs.append(pair[1])

        for x in inputs:
            for y in x:
                input_symbols.append(y)

        # Initialize layers
        self.input_mapper = SemanticalMapper(first_layer=True, inputs=input_symbols)
        self.structure = Structure(n_objects)
        self.semantical_mapper = SemanticalMapper(n_objects, n_types)
        self.attention = Attention(max_attention_depth, max_attention_objects, n_types)
        self.type_computer = TypeComputer(max_attention_depth*max_attention_objects*2, num_functions=self.n_functions,
                                          depth=computer_depth, n_types=self.n_types)
        self.output_mapper = OutputMapper(n_types, outputs)

    def compute(self, data):
        mapped = self.input_mapper.compute(data)
        structure = self.structure.make(mapped)
        for i in range(len(structure)):
            structure[i] = self.semantical_mapper.compute(structure[i])
        filtered = self.attention.filter(structure)
        outputs = self.type_computer.compute(filtered)
        outputs = self.output_mapper.compute(outputs)

        if len(outputs) != 0:
            output = int(outputs[-1])
        else:
            output = 1

        return output

    def set(self, savefile):
        cfile = open(savefile, "rb")
        chromosome = cPickle.load(cfile)

        index = 0
        set_input_mapper = chromosome[0:self.input_mapper.n_symbols]
        index += self.input_mapper.n_symbols
        set_structure_rmap = chromosome[index:index+len(self.structure.r_map)]
        index += len(self.structure.r_map)
        set_semantical_mapper = chromosome[index:index+len(self.semantical_mapper.map)]
        index += len(self.semantical_mapper.map)
        set_attention = chromosome[index:index+self.max_attention_depth*self.max_attention_objects*2]
        index += self.max_attention_depth*self.max_attention_objects*2
        set_computer = chromosome[index:index+len(self.type_computer.path)*len(self.type_computer.path[0])]
        index += len(self.type_computer.path)*len(self.type_computer.path[0])
        set_output_mapper = chromosome[index:]

        self.input_mapper.set(set_input_mapper)
        self.structure.set(set_structure_rmap)
        self.semantical_mapper.set(set_semantical_mapper)
        self.attention.set(set_attention)
        self.type_computer.set(set_computer)
        self.output_mapper.set(set_output_mapper)

    def eval_func(self, chromosome, report_test=True):

        error = 0.0
        error_local = 0.0
        test_error = 0.0
        test_error_local = 0.0

        index = 0
        set_input_mapper = chromosome[0:self.input_mapper.n_symbols]
        index += self.input_mapper.n_symbols
        set_structure_rmap = chromosome[index:index+len(self.structure.r_map)]
        index += len(self.structure.r_map)
        set_semantical_mapper = chromosome[index:index+len(self.semantical_mapper.map)]
        index += len(self.semantical_mapper.map)
        set_attention = chromosome[index:index+self.max_attention_depth*self.max_attention_objects*2]
        index += self.max_attention_depth*self.max_attention_objects*2
        set_computer = chromosome[index:index+len(self.type_computer.path)*len(self.type_computer.path[0])]
        index += len(self.type_computer.path)*len(self.type_computer.path[0])
        set_output_mapper = chromosome[index:]

        self.input_mapper.set(set_input_mapper)
        self.structure.set(set_structure_rmap)
        self.semantical_mapper.set(set_semantical_mapper)
        self.attention.set(set_attention)
        self.type_computer.set(set_computer)
        self.output_mapper.set(set_output_mapper)

        if self.stochastic:
            indexes = [r.randint(0, len(self.train_data) - 1) for x in range(self.batch_size)]
            train_data = [self.train_data[x] for x in indexes]
            indexes = [r.randint(0, len(self.test_data) - 1) for x in range(self.batch_size/2)]
            test_data = [self.test_data[x] for x in indexes]
        else:
            train_data = self.train_data
            test_data = self.test_data

        # print "=>Evaluating training data..."
        for pair in train_data:
            inp = pair[0]
            
            # For normal sequence:
            # target = [self.output_dict[x] for x in pair[1]]
            
            # For classification:
            target = pair[1]

            mapped = self.input_mapper.compute(inp)
            structure = self.structure.make(mapped)
            for i in range(len(structure)):
                structure[i] = self.semantical_mapper.compute(structure[i])
            filtered = self.attention.filter(structure)
            outputs = self.type_computer.compute(filtered)
            outputs = self.output_mapper.compute(outputs)

            # For normal sequence:
            # if len(outputs) >= len(target):
            #         for i in range(len(outputs)):
            #             if i < len(target):
            #                 if outputs[i] != target[i]:
            #                     error_local += 1
            #             else:
            #                 error_local += 1
            # else:
            #     for i in range(len(target)):
            #         if i < len(outputs):
            #             if outputs[i] != target[i]:
            #                 error_local += 1
            #         else:
            #             error_local += 1

            # error += error_local

            # For classification:
            if len(outputs) != 0:
                output = int(outputs[-1])
            else:
                output = 0

            if output != int(target):
                error += 1

        # print "Train acc: " + str((len(train_data) - error)/len(train_data)) + " Train error: " + str(error)

        # print "=>Evaluating testing data..."
        if report_test:
            for pair in test_data:
                inp = pair[0]
                
                # For normal sequence:
                # target = [self.output_dict[x] for x in pair[1]]
                
                # For classification:
                target = pair[1]

                mapped = self.input_mapper.compute(inp)
                structure = self.structure.make(mapped)
                for i in range(len(structure)):
                    structure[i] = self.semantical_mapper.compute(structure[i])
                filtered = self.attention.filter(structure)
                outputs = self.type_computer.compute(filtered)
                outputs = self.output_mapper.compute(outputs)

                # For normal sequence:
                # if len(outputs) >= len(target):
                #     for i in range(len(outputs)):
                #         if i < len(target):
                #             if outputs[i] != target[i]:
                #                 test_error_local += 1
                #         else:
                #             test_error_local += 1
                # else:
                #     for i in range(len(target)):
                #         if i < len(outputs):
                #             if outputs[i] != target[i]:
                #                 test_error_local += 1
                #         else:
                #             test_error_local += 1

                # test_error += test_error_local/len(target)

                # For classification:
                if len(outputs) != 0:
                    output = int(outputs[-1])
                else:
                    output = 0

                print "Output: " + str(output)
                print "Target: " + str(target)

                if output != int(target):
                    test_error += 1

            if (len(test_data) - test_error)/len(test_data) > self.best_fit:
                outfile = open(self.save_file, "wb")
                cPickle.dump(list(chromosome), outfile)
                outfile.close()

            print "Test error acc.: " + str((len(test_data) - test_error)/len(test_data)) \
                  # + " Num error: " + str(test_error)

        return (len(train_data) - error)/len(train_data)

    def evolve(self, n_generations):

        print "Initializing evolution..."

        # Genome instance
        setOfAlleles = GAllele.GAlleles()

        # Alleles for input_mapper
        for i in xrange(self.input_mapper.n_symbols):
            a = GAllele.GAlleleRange(0, self.input_mapper.n_symbols)
            setOfAlleles.add(a)

        # Alleles for structure
        for i in xrange(len(self.structure.r_map)):
            a = GAllele.GAlleleRange(0, self.n_objects)
            setOfAlleles.add(a)

        # Alleles for semantical_mapper
        for i in xrange(len(self.semantical_mapper.map)):
            a = GAllele.GAlleleRange(0, self.n_types)
            setOfAlleles.add(a)

        # Alleles for attention
        for i in xrange(self.max_attention_depth*self.max_attention_objects*2):
            a = GAllele.GAlleleRange(0, self.n_types)
            setOfAlleles.add(a)

        # Alleles for computer
        for i in xrange(len(self.type_computer.path)*len(self.type_computer.path[0])):
            a = GAllele.GAlleleRange(0, self.n_functions-1)
            setOfAlleles.add(a)

        # Alleles for output_mapper
        for i in xrange(self.n_types + 1):
            a = GAllele.GAlleleRange(0, self.output_mapper.n_symbols)
            setOfAlleles.add(a)

        genome = G1DList.G1DList(len(setOfAlleles))
        genome.setParams(allele=setOfAlleles)

        # The evaluator function (objective function)
        genome.evaluator.set(self.eval_func)
        genome.mutator.set(Mutators.G1DListMutatorAllele)
        genome.initializator.set(Initializators.G1DListInitializatorAllele)

        # Genetic Algorithm Instance
        ga = GSimpleGA.GSimpleGA(genome)
        ga.minimax = Consts.minimaxType["maximize"]
        ga.selector.set(Selectors.GRankSelector)
        ga.setGenerations(n_generations)

        print "Evolving..."

        # Do the evolution, with stats dump
        # frequency of 1 generations
        ga.evolve(freq_stats=1)

        print ga.bestIndividual()
示例#32
0
    def __init__(self, datafile, n_objects, n_types, max_attention_depth, max_attention_objects,
                 computer_depth, n_functions, test_fraction=0, data_fraction=1, stochastic=False, batch_size=0,
                 save_file="best_construct.pkl", sep_features_targets=False):
        """
        :param datafile: A list containing input/target pairs, e.g. [[input, target], ...]
        :param data_fraction: Fraction of the dataset to use.
        :param test_fraction: The fraction of the dataset to reserve for testing.
        :param n_objects: The number of objects which Structure reduces the data to.
        :param n_types: The number of types SemanticalMapper maps the objects to. This is the number of separate types
                        as defined by their behavior computed by TypeComputer.
        :param max_attention_depth: The depth of reduction in the Structure that Attention will look for type pairs in.
        :param max_attention_objects: The number of type pairs in each layer of Structure that Attention will look for.
        :param computer_depth: The number of times types will be recursively passed through the functions of
                               TypeComputer.
        :param n_functions: The size of the function set of TypeComputer.
        """

        self.n_objects = n_objects
        self.n_types = n_types
        self.max_attention_depth = max_attention_depth
        self.max_attention_objects = max_attention_objects
        self.computer_depth = computer_depth
        self.n_functions = n_functions
        self.stochastic = stochastic
        self.batch_size = batch_size
        self.save_file = save_file

        self.best_fit = 0.0

        df = open(datafile, "rb")
        self.data = cPickle.load(df)
        df.close()

        features = [str(self.data[i][2:]) for i in range(len(self.data))]
        targets = [str(self.data[i][1]) for i in range(len(self.data))]

        self.data = zip(features, targets)

        if sep_features_targets:
            self.data = zip(self.data[0], self.data[1])

        if data_fraction < 1:
            self.data = [pair for pair in self.data if r.random() < data_fraction]

        print self.data[0][0]
        print self.data[0][1]

        train_len = int(round(len(self.data)*(1 - test_fraction)))
        self.train_data = self.data[:train_len]
        self.test_data = self.data[train_len:]

        print len(self.train_data)
        print len(self.test_data)

        inputs = []
        outputs = []
        input_symbols = []

        for pair in self.data:
            inputs.append(pair[0])
            outputs.append(pair[1])

        for x in inputs:
            for y in x:
                input_symbols.append(y)

        # Initialize layers
        self.input_mapper = SemanticalMapper(first_layer=True, inputs=input_symbols)
        self.structure = Structure(n_objects)
        self.semantical_mapper = SemanticalMapper(n_objects, n_types)
        self.attention = Attention(max_attention_depth, max_attention_objects, n_types)
        self.type_computer = TypeComputer(max_attention_depth*max_attention_objects*2, num_functions=self.n_functions,
                                          depth=computer_depth, n_types=self.n_types)
        self.output_mapper = OutputMapper(n_types, outputs)