def __init__(self, n_labels, settings, unfactorized, lonely_only=True): super().__init__() self.unfactorized = unfactorized if lonely_only: fnn_input = settings.hidden_lstm * 2 else: fnn_input = settings.hidden_lstm * 2 * 2 self.label_head_fnn = nn.Linear(fnn_input, settings.dim_mlp) self.label_dep_fnn = nn.Linear(fnn_input, settings.dim_mlp) self.label_attention = Attention.label_factory(settings.dim_mlp, n_labels, settings.attention) self.dim_lstm = settings.hidden_lstm self.dropout_label = settings.dropout_label self.dropout_main_ff = settings.dropout_main_ff self.locked_dropout = LockedDropout() if not self.unfactorized: self.edge_head_fnn = nn.Linear(fnn_input, settings.dim_mlp) self.edge_dep_fnn = nn.Linear(fnn_input, settings.dim_mlp) self.edge_attention = Attention.edge_factory( settings.dim_mlp, settings.attention) self.dropout_edge = settings.dropout_edge
def __init__(self, maxlen_sentence, maxlen_word, max_features, embedding_dims, class_num=1, last_activation='sigmoid'): super(HAN, self).__init__() self.maxlen_sentence = maxlen_sentence self.maxlen_word = maxlen_word self.max_features = max_features self.embedding_dims = embedding_dims self.class_num = class_num self.last_activation = last_activation # Word part input_word = Input(shape=(self.maxlen_word, )) x_word = Embedding(self.max_features, self.embedding_dims, input_length=self.maxlen_word)(input_word) x_word = Bidirectional(CuDNNLSTM(128, return_sequences=True))( x_word) # LSTM or GRU x_word = Attention(self.maxlen_word)(x_word) model_word = Model(input_word, x_word) # Sentence part self.word_encoder_att = TimeDistributed(model_word) self.sentence_encoder = Bidirectional( CuDNNLSTM(128, return_sequences=True)) # LSTM or GRU self.sentence_att = Attention(self.maxlen_sentence) # Output part self.classifier = Dense(self.class_num, activation=self.last_activation)
def __init__(self, encoder, embed_hidden=300, mlp_hidden=512, time_steps=3): super(TopDownWithContext, self).__init__() self.encoder = encoder self.n_verbs = self.encoder.get_num_verbs() self.vocab_size = self.encoder.get_num_labels() self.max_role_count = self.encoder.get_max_role_count() self.n_role_q_vocab = len(self.encoder.question_words) self.w_emb = nn.Embedding(self.n_role_q_vocab + 1, embed_hidden, padding_idx=self.n_role_q_vocab) self.q_emb = nn.LSTM(embed_hidden, mlp_hidden, batch_first=True, bidirectional=True) self.lstm_proj = nn.Linear(mlp_hidden * 2, mlp_hidden) self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden) self.ctx_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden) self.context = FCNet([mlp_hidden*2, mlp_hidden]) #self.role_weight = RoleWeightAttention(mlp_hidden, mlp_hidden, mlp_hidden) self.detailedq = FCNet([mlp_hidden*2, mlp_hidden]) self.concat = FCNet([mlp_hidden*2, mlp_hidden]) self.q_net = FCNet([mlp_hidden, mlp_hidden]) self.v_net = FCNet([mlp_hidden, mlp_hidden]) self.classifier = SimpleClassifier( mlp_hidden, 2 * mlp_hidden, self.vocab_size, 0.5) self.time_steps = time_steps
def __init__(self, bert, opt): super(Net, self).__init__() self.opt = opt self.bert = bert self.squeeze_embedding = SqueezeEmbedding() self.dropout = nn.Dropout(opt.dropout) self.attn_k = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.attn_q = Attention(opt.bert_dim, out_dim=opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.ffn_c = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.ffn_t = PositionwiseFeedForward(opt.hidden_dim, dropout=opt.dropout) self.attn_s1 = Attention(opt.hidden_dim, n_head=8, score_function='mlp', dropout=opt.dropout) self.dense = nn.Linear(opt.hidden_dim * 3, opt.polarities_dim)
class GeneratorN(nn.Module): def __init__(self, use_self_attention=False): super().__init__() self.residuals = nn.Sequential( *[Residual(D_GF * 2) for _ in range(RESIDUALS)]) self.attn = Attention(D_GF, D_HIDDEN) self.upsample = upsample_block(D_GF * 2, D_GF) self.use_self_attention = use_self_attention if self.use_self_attention: self.self_attn = self_attn_block() p_trainable, p_non_trainable = count_params(self) print( f'GeneratorN params: trainable {p_trainable} - non_trainable {p_non_trainable}' ) def forward(self, h_code, c_code, word_embs, mask): """ h_code1(query), output of previous generator: batch x D_GF x ih x iw (queryL=ihxiw) word_embs(context): batch x D_COND x seq_len c_code1: batch x D_GF x ih x iw att1: batch x sourceL x ih x iw """ self.attn.applyMask(mask) c_code, att = self.attn(h_code, word_embs) # Image-text attention first, image-image attention second if self.use_self_attention: c_code = self.self_attn(c_code) out_code = torch.cat((h_code, c_code), 1) out_code = self.residuals(out_code) out_code = self.upsample(out_code) # D_GF/2 x 2ih x 2iw return out_code, att
def get_model(self): # Word part sentence_input = Input(shape=(self.max_sen_len, ), dtype='int32') if not self.embedding: emb_layer = Embedding(self.input_dim, self.embedding_dims, input_length=self.max_sen_len, trainable=True, mask_zero=True) else: emb_layer = Embedding(self.embedding.shape[0], self.embedding.shape[1], input_length=self.max_sen_len, weights=[self.embedding], trainable=True, mask_zero=True) embedded_sequences = emb_layer(sentence_input) gru_word = Bidirectional(CuDNNGRU( 128, return_sequences=True))(embedded_sequences) word_atten = Attention(128)(gru_word) model_word = Model(sentence_input, word_atten) # Sentence part input = Input(shape=(self.max_sents, self.max_sen_len)) x_sentences = TimeDistributed(model_word)(input) gru_sent = Bidirectional(CuDNNGRU(128, return_sequences=True))(x_sentences) sent_atten = Attention(128)(gru_sent) output = Dense(self.class_num, activation=self.last_activation)(sent_atten) model = Model(inputs=input, outputs=output) return model
def __init__(self, num_layers, hidden_size, output_size, device): super(SpellNet, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.num_layers = num_layers self.embedded = nn.Embedding(self.output_size, self.hidden_size) self.lstm = nn.LSTM(self.hidden_size, self.hidden_size, self.num_layers, batch_first=True) self.attention_video_rnn = AttentionRNN.AttnDecoderRNN(256, 40) self.attention_audio_rnn = AttentionRNN.AttnDecoderRNN(256, 40) self.attentionVideo = Attention.AttentionNet(hidden_size, hidden_size) self.attentionAudio = Attention.AttentionNet(hidden_size, hidden_size) self.mlp = nn.Sequential( nn.Linear(hidden_size * 2, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 256), nn.ReLU(), nn.Linear(256, output_size) ) self.inp_mlp = nn.Sequential( nn.Linear(100, 40), nn.ReLU() ) ## Move to Device self.embedded = self.embedded.to(device) self.lstm = self.lstm.to(device) self.attentionVideo = self.attentionVideo.to(device) self.attentionAudio = self.attentionAudio.to(device) self.mlp = self.mlp.to(device)
def model(self): config = model_parameters.model_config() inputs = Input(shape=(config.num_of_fields * config.time_steps,)) embeddings = Embedding(config.vocab_size, config.token_embedding_dim, input_length=(config.num_of_fields * config.time_steps))(inputs) reshape = Reshape((config.time_steps, (config.token_embedding_dim * config.num_of_fields)), input_shape=((config.num_of_fields * config.time_steps), config.token_embedding_dim))(embeddings) dropout_embeddings = Dropout(config.dropout_rate)(reshape) past = Lambda(lambda x : x[:,:26,:])(dropout_embeddings) future = Lambda(lambda x : x[:,25:,:])(dropout_embeddings) past_LSTM = LSTM(config.token_embedding_dim * config.num_of_fields, go_backwards=True, return_sequences=True)(past) future_LSTM = LSTM(config.token_embedding_dim * config.num_of_fields, go_backwards=False, return_sequences=True)(future) #merged = Concatenate(axis=1)([past_LSTM, future_LSTM]) attention_1 = Attention()(past_LSTM) attention_2 = Attention()(future_LSTM) merged = Concatenate(axis=1)([attention_1, attention_2]) dense1 = Dense(2000, activation='relu')(merged) dense2 = Dense(1000, activation='relu')(dense1) outputs = Dense(2, activation='softmax')(dense2) model = Model(inputs=inputs, outputs=outputs) model.compile(optimizer=Adam(lr=config.lr, clipvalue=5.0), loss="hinge", metrics=['binary_accuracy']) print(model.summary()) return model
def __init__(self, num_classes: int = 2, bidirectional: bool = False, rnn_layers: int = 1, hidden_size: int = 256, rnn_type: str = 'GRU'): super(ATAE_LSTM, self).__init__() self.stackedembeddings: StackedEmbeddings = StackedEmbeddings([ FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ]) self.wordembeddings: StackedEmbeddings = StackedEmbeddings( [WordEmbeddings('glove')]) self.embedding_dimension: int = self.stackedembeddings.embedding_length + self.wordembeddings.embedding_length self.bidirectional: bool = bidirectional self.rnn_layers: int = rnn_layers self.rnn_type: str = rnn_type self.num_classes: int = num_classes self.hidden_size: int = hidden_size if self.rnn_type == 'GRU': self.rnn = torch.nn.GRU(self.embedding_dimension, self.hidden_size, bidirectional=self.bidirectional, num_layers=self.rnn_layers) else: self.rnn = torch.nn.LSTM(self.embedding_dimension, self.hidden_size, bidirectional=self.bidirectional, num_layers=self.rnn_layers) self.attention = Attention()
def visualization(nums=5): (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data( r'D:\pyproject\data\cifar-10-batches-py') x_test = x_test / 255.0 n_classes = 10 _inputs = Input(shape=(32, 32, 3), name='input') _layer = ConvBlock(64, 3)(_inputs) _layer = ConvBlock(128, 3)(_layer) c1 = ConvBlock(256, 3, pooling=True)(_layer) c2 = ConvBlock(512, 3, pooling=True)(c1) c3 = ConvBlock(512, 3, pooling=True)(c2) _layer = ConvBlock(512, 3, pooling=True)(c3) _layer = ConvBlock(512, 3, pooling=True)(_layer) _layer = Flatten()(_layer) _g = Dense(512, activation='relu')(_layer) att1_f, att1_map = Attention((16, 16, 256), 512, method='pc')([c1, _g]) att2_f, att2_map = Attention((8, 8, 512), 512, method='pc')([c2, _g]) att3_f, att3_map = Attention((4, 4, 512), 512, method='pc')([c3, _g]) f_concat = tf.concat([att1_f, att2_f, att3_f], axis=1) _out = Dense(n_classes, 'softmax')(f_concat) model = Model(_inputs, [_out, att1_map, att2_map, att3_map]) model.load_weights(r'D:\pyproject\data\CIFAR10-tensorflow\vgg-att.h5') # model.compile() index_lst = np.arange(len(x_test)) np.random.shuffle(index_lst) x_test = x_test[index_lst] x_test = x_test[0:nums] y_test = y_test[index_lst] y_test = y_test[0:nums] pred, att1_map, att2_map, att3_map, = model.predict(x_test) pred = tf.argmax(pred, axis=1) att1_map = tf.squeeze( tf.image.resize(tf.expand_dims(att1_map, -1), (32, 32)), -1) att2_map = tf.squeeze( tf.image.resize(tf.expand_dims(att2_map, -1), (32, 32)), -1) att3_map = tf.squeeze( tf.image.resize(tf.expand_dims(att3_map, -1), (32, 32)), -1) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') for i in range(nums): img = x_test[i] plt.subplot(141) plt.imshow(img) plt.title('Img') plt.subplot(142) plt.imshow(img) plt.imshow(att1_map[i], alpha=0.4, cmap='rainbow') plt.title('att1_map') plt.subplot(143) plt.imshow(img) plt.imshow(att2_map[i], alpha=0.4, cmap='rainbow') plt.title('att2_map') plt.subplot(144) plt.imshow(img) plt.imshow(att3_map[i], alpha=0.4, cmap='rainbow') plt.title('att3_map') plt.suptitle( f'Prediction={classes[pred[i]]} True={classes[y_test[i][0]]}') plt.show()
def __init__(self, global_model, aggregation="normal_atten"): # FLServer(GlobalModel_MNIST_CNN, "127.0.0.1", 5000, gpu) # os.environ['CUDA_VISIBLE_DEVICES'] = '%d'%gpu self.global_model = global_model() self.ready_client_sids = set() # self.host = host # self.port = port self.client_resource = {} self.wait_time = 0 self.model_id = str(uuid.uuid4()) self.aggregation = aggregation self.attention_mechanism = Attention() ##### # training states self.current_round = -1 # -1 for not yet started self.current_round_client_updates = [] self.eval_client_updates = [] ##### self.invalid_tolerate = 0
def _self_attention(self, q, k, v, seq_len): with tf.variable_scope("self-attention"): attention = Attention(num_heads=self.num_heads, mode="encoder", linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, dropout=self.dropout) return attention.multi_head(q, k, v, seq_len)
def forward(self, hs_enc, hs_dec): N, T, H = hs_dec.shape out = np.empty_like(hs_dec) for t in range(T): layer = Attention() out[:, t, :] = layer.forward(hs_enc, hs_dec[:, t, :]) self.layers.append(layer) self.attention_weights.append(layer.attention_weight) return out
def _encoder_decoder_attention(self, q, k, v, bias): with tf.variable_scope("encoder-decoder-attention"): attention = Attention(num_heads=self.num_heads, mode="encoder-decoder-attention", linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, dropout=self.dropout) return attention.multi_head(q, k, v, bias)
def _masked_self_attention(self, q, k, v, bias): with tf.variable_scope("masked-self-attention"): attention = Attention(num_heads=self.num_heads, mode="masked-self-attention", linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, dropout=self.dropout) return attention.multi_head(q, k, v, bias)
def _self_attention(self, q, k, v, future, sos, seq_len): with tf.variable_scope("self-attention"): attention = Attention(num_heads=self.num_heads, masked=True, linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, dropout=self.dropout, batch_size=self.batch_size) return attention.multi_head(q, k, v, future, sos, seq_len)
def _pooling_layer(self, q, k, v, seq_len): with tf.variable_scope("self-attention"): attention = Attention(num_heads=self.num_heads, masked=True, linear_key_dim=self.linear_key_dim, linear_value_dim=self.linear_value_dim, model_dim=self.model_dim, dropout=self.dropout, batch_size=self.batch_size) return attention.classifier_head(q, k, v, seq_len)
def __init__(self, options): super(LSTM_Att, self).__init__() self.ques_emb = QuestionEmbedding(options) self.image_mlp_act = nn.Linear(options['n_image_feat'], options['n_dim']) self.att1 = Attention(options) self.att1 = Attention(options) self.combined_mlp_drop_0 = nn.Dropout(p=options['drop_ratio']) self.combined_mlp_0 = nn.Linear(options['n_dim'], options['n_output'])
def seq2seq_model(x_train_1, x_train_2): #encoder S_inputs = Input(shape=(x_train_1.shape[1], x_train_1.shape[2])) # embeddings = Embedding(max_features, 128)(S_inputs) # embeddings = Position_Embedding()(S_inputs) # 增加Position_Embedding能轻微提高准确率 encoded = Attention(32, 32)([S_inputs, S_inputs, S_inputs]) # O_seq=Attention(16, 16)([O_seq, O_seq, O_seq]) # O_seq = GlobalAveragePooling1D()(O_seq) # O_seq = Dropout(dropout)(O_seq) # outputs = Dense(3, activation='softmax')(O_seq) #decoder decoder = RecurrentSequential( decode=True, output_length=1, # x_train_2.shape[1] unroll=False, stateful=False) decoder.add( Dropout(dropout, batch_input_shape=(None, x_train_1.shape[1], hidden_dim))) if depth[1] == 1: decoder.add( AttentionDecoderCell(output_dim=x_train_2.shape[2], hidden_dim=hidden_dim)) else: decoder.add( AttentionDecoderCell(output_dim=x_train_2.shape[2], hidden_dim=hidden_dim)) for _ in range(depth[1] - 2): decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=hidden_dim, hidden_dim=hidden_dim)) decoder.add(Dropout(dropout)) decoder.add( LSTMDecoderCell(output_dim=x_train_2.shape[2], hidden_dim=hidden_dim)) #regression model x = Attention(8, 16)([encoded, encoded, encoded]) x = GlobalAveragePooling1D()(x) x = Dropout(dropout)(x) regr_outputs = Dense(3, activation='softmax')(x) decoded = decoder(encoded) decoded = Reshape((x_train_2.shape[2], ))(decoded) model = Model(inputs=S_inputs, outputs=[decoded, regr_outputs]) print(model.summary()) # try using different optimizers and different optimizer configs model.compile(loss=['mse', 'categorical_crossentropy'], loss_weights=[1, 10], optimizer='adam', metrics=['categorical_accuracy']) return model
def build_baseline0(dataset, num_hid): w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att1 = Attention(dataset.v_dim, q_emb.num_hid, num_hid) v_att2 = Attention(dataset.v_dim, q_emb.num_hid + dataset.v_dim, num_hid) q_net = FCNet([num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) classifier = SimpleClassifier(num_hid, 2 * num_hid, dataset.num_ans_candidates, 0.5) return SANModel1(w_emb, q_emb, v_att1, v_att2, q_net, v_net, classifier)
def build_baseline(dataset,opt): opt=config.parse_opt() w_emb=WordEmbedding(dataset.dictionary.ntokens(),300,opt.EMB_DROPOUT) q_emb=QuestionEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT) v_emb=VideoEmbedding(opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT) v_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT) r_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT) v_fc=Videofc(opt.GLIMPSE,opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.FC_DROPOUT) a_emb=AnswerEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT) rela_emb = Rela_Module(opt.NUM_HIDDEN*3,opt.NUM_HIDDEN,opt.NUM_HIDDEN) classifier=SimpleClassifier(opt.NUM_HIDDEN,opt.MID_DIM,dataset.num_ans,opt.FC_DROPOUT) return BaseModel(w_emb,q_emb,v_emb,a_emb,v_att,v_fc,rela_emb,r_att,classifier,opt)
def __init__(self, pretrained_vgg: VGG, method): super(VGG_Att, self).__init__() self.__vgg = pretrained_vgg self.__att1 = Attention(local_shape=(256, 16, 16), global_shape=512, method=method) self.__att2 = Attention(local_shape=(512, 8, 8), global_shape=512, method=method) self.__att3 = Attention(local_shape=(512, 4, 4), global_shape=512, method=method) self.__classifier = nn.Sequential( nn.Linear(256+512+512, 10), nn.LogSoftmax(1)) self.__unsample_1 = nn.UpsamplingBilinear2d(scale_factor=2) self.__unsample_2 = nn.UpsamplingBilinear2d(scale_factor=4) self.__unsample_3 = nn.UpsamplingBilinear2d(scale_factor=8)
def build_news_encoder(word_index, category_map, subcategory_map): embedding_matrix = get_embedding_matrix(word_index) embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], trainable=True) news_input = Input((MAX_TITLE_LENGTH+MAX_ABSTRACT_LENGTH+2, ), dtype='int32') # title title_input = Lambda(lambda x: x[:, : MAX_TITLE_LENGTH])(news_input) title_embedded_sequences = embedding_layer(title_input) title_embedded_sequences = Dropout(0.2)(title_embedded_sequences) title_cnn = Conv1D(400, 3, padding='same', activation='relu', strides=1)(title_embedded_sequences) title_cnn = Dropout(0.2)(title_cnn) title_attention = Attention(200)(title_cnn) title_attention = Reshape((1, 400))(title_attention) # abstract abstract_input = Lambda(lambda x: x[:, MAX_TITLE_LENGTH : MAX_ABSTRACT_LENGTH + MAX_TITLE_LENGTH])(news_input) abstract_embedded_sequences = embedding_layer(abstract_input) abstract_embedded_sequences = Dropout(0.2)(abstract_embedded_sequences) abstract_cnn = Conv1D(400, 3, padding='same', activation='relu', strides=1)(abstract_embedded_sequences) abstract_cnn = Dropout(0.2)(abstract_cnn) abstract_attention = Attention(200)(abstract_cnn) abstract_attention = Reshape((1, 400))(abstract_attention) # category category_input = Lambda(lambda x: x[:, MAX_TITLE_LENGTH + MAX_ABSTRACT_LENGTH : MAX_TITLE_LENGTH + MAX_ABSTRACT_LENGTH + 1])(news_input) category_embedding_layer = Embedding(len(category_map) + 1, C_EMBEDDING_DIM, trainable=True) category_embedded = category_embedding_layer(category_input) category_dense = Dense(400, activation='relu')(category_embedded) category_dense = Reshape((1, 400))(category_dense) # subcategory subcategory_input = Lambda(lambda x: x[:, MAX_TITLE_LENGTH + MAX_ABSTRACT_LENGTH + 1 : ])(news_input) subcategory_embedding_layer = Embedding(len(subcategory_map) + 1, C_EMBEDDING_DIM, trainable=True) subcategory_embedded = subcategory_embedding_layer(subcategory_input) subcategory_dense = Dense(400, activation='relu')(subcategory_embedded) subcategory_dense = Reshape((1, 400))(subcategory_dense) # concatenate news_r = Concatenate(axis=-2)([title_attention, abstract_attention, category_dense, subcategory_dense]) news_r = Attention(200)(news_r) news_encoder = Model(news_input, news_r, name='news_encoder') # from tensorflow.keras.utils import plot_model # plot_model(news_encoder, to_file='news_encoder.png', show_shapes=True) return news_encoder
def __init__(self, embedding_matrix): super(IAN, self).__init__() self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.lstm = nn.LSTM(embed_dim, hidden_dim, lstm_layers, batch_first=True) self.attention_aspect = Attention(hidden_dim, score_function='bi_linear') self.attention_context = Attention(hidden_dim, score_function='bi_linear') self.dense = nn.Linear(hidden_dim * 2, polarities_dim)
def __init__(self, v_dim, a_dim, l_dim, hidden_size, w_emb, l_emb): """Encode language prior with different modality features""" super(BaselineEncoder, self).__init__() self.hidden_size = hidden_size self.w_emb = w_emb # WordEmbedding self.l_emb = l_emb # SequenceEmbedding self.v_att = Attention( v_dim, l_dim, hidden_size) # Attention(vis_hid_dim, dialoge_dim, hidden_size) self.a_att = Attention( a_dim, l_dim, hidden_size) # Attention(aud_hid_dim, dialoge_dim, hidden_size) self.c2d_v = FC([v_dim, hidden_size]) # On paper self.c2d_a = FC([a_dim, hidden_size])
def __init__(self, use_self_attention=False): super().__init__() self.residuals = nn.Sequential( *[Residual(D_GF * 2) for _ in range(RESIDUALS)]) self.attn = Attention(D_GF, D_HIDDEN) self.upsample = upsample_block(D_GF * 2, D_GF) self.use_self_attention = use_self_attention if self.use_self_attention: self.self_attn = self_attn_block() p_trainable, p_non_trainable = count_params(self) print( f'GeneratorN params: trainable {p_trainable} - non_trainable {p_non_trainable}' )
class TestAttemtion(unittest.TestCase): def setUp(self): self.attention = Attention() self.hs = np.random.randn(10, 5, 4) self.h = np.random.randn(10, 4) def test_forward(self): out = self.attention.forward(self.hs, self.h) self.assertEqual((10, 4), out.shape) def test_backward(self): dout = self.attention.forward(self.hs, self.h) dhs, dh = self.attention.backward(dout) self.assertEqual((10, 5, 4), dhs.shape) self.assertEqual((10, 5), dh.shape)
def build_baseline(dataset,opt): w_emb=WordEmbedding(dataset.dictionary.ntokens(),300,opt.EMB_DROPOUT) q_emb=QuestionEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT) v_emb=VideoEmbedding(opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT) v_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT) r_att=Attention(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT) v_fc=Videofc(opt.GLIMPSE,opt.C3D_SIZE+opt.RES_SIZE,opt.NUM_HIDDEN,opt.FC_DROPOUT) a_emb=AnswerEmbedding(300,opt.NUM_HIDDEN,opt.NUM_LAYER,opt.BIDIRECT,opt.L_RNN_DROPOUT) rela_emb = Rela_Module(opt.NUM_HIDDEN*3,opt.NUM_HIDDEN,opt.NUM_HIDDEN) classifier=SimpleClassifier(opt.NUM_HIDDEN*2,opt.MID_DIM,1,opt.FC_DROPOUT) ques_att = Q_Att(opt.NUM_HIDDEN,opt.MID_DIM,opt.FC_DROPOUT) #vlinear=FCNet([opt.NUM_HIDDEN,opt.MID_DIM,opt.NUM_HIDDEN]) #rlinear=FCNet([opt.NUM_HIDDEN,opt.MID_DIM,opt.NUM_HIDDEN]) return BaseModel(w_emb,q_emb,v_emb,a_emb,v_att,v_fc,rela_emb,r_att,classifier,ques_att,opt)
def main(output_path=r'D:\pyproject\data\CIFAR10-tensorflow'): (x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data( r'D:\pyproject\data\cifar-10-batches-py') x_train = x_train / 255.0 x_test = x_test / 255.0 y_train = utils.to_categorical(y_train, num_classes=10) y_test = utils.to_categorical(y_test, num_classes=10) epochs = 30 batch_size = 128 n_classes = 10 model_path_loss = output_path + r"\vgg-att.h5" save_model_loss = ModelCheckpoint(model_path_loss, monitor='val_loss', save_best_only=True, verbose=2) _inputs = Input(shape=(32, 32, 3), name='input') _layer = ConvBlock(64, 3)(_inputs) _layer = ConvBlock(128, 3)(_layer) c1 = ConvBlock(256, 3, pooling=True)(_layer) c2 = ConvBlock(512, 3, pooling=True)(c1) c3 = ConvBlock(512, 3, pooling=True)(c2) _layer = ConvBlock(512, 3, pooling=True)(c3) _layer = ConvBlock(512, 3, pooling=True)(_layer) _layer = Flatten()(_layer) _g = Dense(512, activation='relu')(_layer) _outputs = Dense(n_classes, activation='softmax')(_g) vgg = Model(_inputs, _outputs) vgg.load_weights(output_path + r"\vgg.h5") att1_f, att1_map = Attention((16, 16, 256), 512, method='pc')([c1, _g]) att2_f, att2_map = Attention((8, 8, 512), 512, method='pc')([c2, _g]) att3_f, att3_map = Attention((4, 4, 512), 512, method='pc')([c3, _g]) f_concat = tf.concat([att1_f, att2_f, att3_f], axis=1) _out = Dense(n_classes, 'softmax')(f_concat) model = Model(_inputs, _out) opt = optimizers.SGD(learning_rate=0.01, momentum=0.9) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) model.summary() model.fit(x_train, y_train, batch_size=batch_size, validation_data=(x_test, y_test), epochs=epochs, verbose=1, callbacks=[save_model_loss])
def __init__(self, embedding_matrix): super(MemNet, self).__init__() self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.attention = Attention(embed_dim, score_function='mlp') self.x_linear = nn.Linear(embed_dim, embed_dim) self.dense = nn.Linear(embed_dim, polarities_dim)
class Constructor: def __init__(self, datafile, n_objects, n_types, max_attention_depth, max_attention_objects, computer_depth, n_functions, test_fraction=0, data_fraction=1, stochastic=False, batch_size=0, save_file="best_construct.pkl", sep_features_targets=False): """ :param datafile: A list containing input/target pairs, e.g. [[input, target], ...] :param data_fraction: Fraction of the dataset to use. :param test_fraction: The fraction of the dataset to reserve for testing. :param n_objects: The number of objects which Structure reduces the data to. :param n_types: The number of types SemanticalMapper maps the objects to. This is the number of separate types as defined by their behavior computed by TypeComputer. :param max_attention_depth: The depth of reduction in the Structure that Attention will look for type pairs in. :param max_attention_objects: The number of type pairs in each layer of Structure that Attention will look for. :param computer_depth: The number of times types will be recursively passed through the functions of TypeComputer. :param n_functions: The size of the function set of TypeComputer. """ self.n_objects = n_objects self.n_types = n_types self.max_attention_depth = max_attention_depth self.max_attention_objects = max_attention_objects self.computer_depth = computer_depth self.n_functions = n_functions self.stochastic = stochastic self.batch_size = batch_size self.save_file = save_file self.best_fit = 0.0 df = open(datafile, "rb") self.data = cPickle.load(df) df.close() features = [str(self.data[i][2:]) for i in range(len(self.data))] targets = [str(self.data[i][1]) for i in range(len(self.data))] self.data = zip(features, targets) if sep_features_targets: self.data = zip(self.data[0], self.data[1]) if data_fraction < 1: self.data = [pair for pair in self.data if r.random() < data_fraction] print self.data[0][0] print self.data[0][1] train_len = int(round(len(self.data)*(1 - test_fraction))) self.train_data = self.data[:train_len] self.test_data = self.data[train_len:] print len(self.train_data) print len(self.test_data) inputs = [] outputs = [] input_symbols = [] for pair in self.data: inputs.append(pair[0]) outputs.append(pair[1]) for x in inputs: for y in x: input_symbols.append(y) # Initialize layers self.input_mapper = SemanticalMapper(first_layer=True, inputs=input_symbols) self.structure = Structure(n_objects) self.semantical_mapper = SemanticalMapper(n_objects, n_types) self.attention = Attention(max_attention_depth, max_attention_objects, n_types) self.type_computer = TypeComputer(max_attention_depth*max_attention_objects*2, num_functions=self.n_functions, depth=computer_depth, n_types=self.n_types) self.output_mapper = OutputMapper(n_types, outputs) def compute(self, data): mapped = self.input_mapper.compute(data) structure = self.structure.make(mapped) for i in range(len(structure)): structure[i] = self.semantical_mapper.compute(structure[i]) filtered = self.attention.filter(structure) outputs = self.type_computer.compute(filtered) outputs = self.output_mapper.compute(outputs) if len(outputs) != 0: output = int(outputs[-1]) else: output = 1 return output def set(self, savefile): cfile = open(savefile, "rb") chromosome = cPickle.load(cfile) index = 0 set_input_mapper = chromosome[0:self.input_mapper.n_symbols] index += self.input_mapper.n_symbols set_structure_rmap = chromosome[index:index+len(self.structure.r_map)] index += len(self.structure.r_map) set_semantical_mapper = chromosome[index:index+len(self.semantical_mapper.map)] index += len(self.semantical_mapper.map) set_attention = chromosome[index:index+self.max_attention_depth*self.max_attention_objects*2] index += self.max_attention_depth*self.max_attention_objects*2 set_computer = chromosome[index:index+len(self.type_computer.path)*len(self.type_computer.path[0])] index += len(self.type_computer.path)*len(self.type_computer.path[0]) set_output_mapper = chromosome[index:] self.input_mapper.set(set_input_mapper) self.structure.set(set_structure_rmap) self.semantical_mapper.set(set_semantical_mapper) self.attention.set(set_attention) self.type_computer.set(set_computer) self.output_mapper.set(set_output_mapper) def eval_func(self, chromosome, report_test=True): error = 0.0 error_local = 0.0 test_error = 0.0 test_error_local = 0.0 index = 0 set_input_mapper = chromosome[0:self.input_mapper.n_symbols] index += self.input_mapper.n_symbols set_structure_rmap = chromosome[index:index+len(self.structure.r_map)] index += len(self.structure.r_map) set_semantical_mapper = chromosome[index:index+len(self.semantical_mapper.map)] index += len(self.semantical_mapper.map) set_attention = chromosome[index:index+self.max_attention_depth*self.max_attention_objects*2] index += self.max_attention_depth*self.max_attention_objects*2 set_computer = chromosome[index:index+len(self.type_computer.path)*len(self.type_computer.path[0])] index += len(self.type_computer.path)*len(self.type_computer.path[0]) set_output_mapper = chromosome[index:] self.input_mapper.set(set_input_mapper) self.structure.set(set_structure_rmap) self.semantical_mapper.set(set_semantical_mapper) self.attention.set(set_attention) self.type_computer.set(set_computer) self.output_mapper.set(set_output_mapper) if self.stochastic: indexes = [r.randint(0, len(self.train_data) - 1) for x in range(self.batch_size)] train_data = [self.train_data[x] for x in indexes] indexes = [r.randint(0, len(self.test_data) - 1) for x in range(self.batch_size/2)] test_data = [self.test_data[x] for x in indexes] else: train_data = self.train_data test_data = self.test_data # print "=>Evaluating training data..." for pair in train_data: inp = pair[0] # For normal sequence: # target = [self.output_dict[x] for x in pair[1]] # For classification: target = pair[1] mapped = self.input_mapper.compute(inp) structure = self.structure.make(mapped) for i in range(len(structure)): structure[i] = self.semantical_mapper.compute(structure[i]) filtered = self.attention.filter(structure) outputs = self.type_computer.compute(filtered) outputs = self.output_mapper.compute(outputs) # For normal sequence: # if len(outputs) >= len(target): # for i in range(len(outputs)): # if i < len(target): # if outputs[i] != target[i]: # error_local += 1 # else: # error_local += 1 # else: # for i in range(len(target)): # if i < len(outputs): # if outputs[i] != target[i]: # error_local += 1 # else: # error_local += 1 # error += error_local # For classification: if len(outputs) != 0: output = int(outputs[-1]) else: output = 0 if output != int(target): error += 1 # print "Train acc: " + str((len(train_data) - error)/len(train_data)) + " Train error: " + str(error) # print "=>Evaluating testing data..." if report_test: for pair in test_data: inp = pair[0] # For normal sequence: # target = [self.output_dict[x] for x in pair[1]] # For classification: target = pair[1] mapped = self.input_mapper.compute(inp) structure = self.structure.make(mapped) for i in range(len(structure)): structure[i] = self.semantical_mapper.compute(structure[i]) filtered = self.attention.filter(structure) outputs = self.type_computer.compute(filtered) outputs = self.output_mapper.compute(outputs) # For normal sequence: # if len(outputs) >= len(target): # for i in range(len(outputs)): # if i < len(target): # if outputs[i] != target[i]: # test_error_local += 1 # else: # test_error_local += 1 # else: # for i in range(len(target)): # if i < len(outputs): # if outputs[i] != target[i]: # test_error_local += 1 # else: # test_error_local += 1 # test_error += test_error_local/len(target) # For classification: if len(outputs) != 0: output = int(outputs[-1]) else: output = 0 print "Output: " + str(output) print "Target: " + str(target) if output != int(target): test_error += 1 if (len(test_data) - test_error)/len(test_data) > self.best_fit: outfile = open(self.save_file, "wb") cPickle.dump(list(chromosome), outfile) outfile.close() print "Test error acc.: " + str((len(test_data) - test_error)/len(test_data)) \ # + " Num error: " + str(test_error) return (len(train_data) - error)/len(train_data) def evolve(self, n_generations): print "Initializing evolution..." # Genome instance setOfAlleles = GAllele.GAlleles() # Alleles for input_mapper for i in xrange(self.input_mapper.n_symbols): a = GAllele.GAlleleRange(0, self.input_mapper.n_symbols) setOfAlleles.add(a) # Alleles for structure for i in xrange(len(self.structure.r_map)): a = GAllele.GAlleleRange(0, self.n_objects) setOfAlleles.add(a) # Alleles for semantical_mapper for i in xrange(len(self.semantical_mapper.map)): a = GAllele.GAlleleRange(0, self.n_types) setOfAlleles.add(a) # Alleles for attention for i in xrange(self.max_attention_depth*self.max_attention_objects*2): a = GAllele.GAlleleRange(0, self.n_types) setOfAlleles.add(a) # Alleles for computer for i in xrange(len(self.type_computer.path)*len(self.type_computer.path[0])): a = GAllele.GAlleleRange(0, self.n_functions-1) setOfAlleles.add(a) # Alleles for output_mapper for i in xrange(self.n_types + 1): a = GAllele.GAlleleRange(0, self.output_mapper.n_symbols) setOfAlleles.add(a) genome = G1DList.G1DList(len(setOfAlleles)) genome.setParams(allele=setOfAlleles) # The evaluator function (objective function) genome.evaluator.set(self.eval_func) genome.mutator.set(Mutators.G1DListMutatorAllele) genome.initializator.set(Initializators.G1DListInitializatorAllele) # Genetic Algorithm Instance ga = GSimpleGA.GSimpleGA(genome) ga.minimax = Consts.minimaxType["maximize"] ga.selector.set(Selectors.GRankSelector) ga.setGenerations(n_generations) print "Evolving..." # Do the evolution, with stats dump # frequency of 1 generations ga.evolve(freq_stats=1) print ga.bestIndividual()
def __init__(self, datafile, n_objects, n_types, max_attention_depth, max_attention_objects, computer_depth, n_functions, test_fraction=0, data_fraction=1, stochastic=False, batch_size=0, save_file="best_construct.pkl", sep_features_targets=False): """ :param datafile: A list containing input/target pairs, e.g. [[input, target], ...] :param data_fraction: Fraction of the dataset to use. :param test_fraction: The fraction of the dataset to reserve for testing. :param n_objects: The number of objects which Structure reduces the data to. :param n_types: The number of types SemanticalMapper maps the objects to. This is the number of separate types as defined by their behavior computed by TypeComputer. :param max_attention_depth: The depth of reduction in the Structure that Attention will look for type pairs in. :param max_attention_objects: The number of type pairs in each layer of Structure that Attention will look for. :param computer_depth: The number of times types will be recursively passed through the functions of TypeComputer. :param n_functions: The size of the function set of TypeComputer. """ self.n_objects = n_objects self.n_types = n_types self.max_attention_depth = max_attention_depth self.max_attention_objects = max_attention_objects self.computer_depth = computer_depth self.n_functions = n_functions self.stochastic = stochastic self.batch_size = batch_size self.save_file = save_file self.best_fit = 0.0 df = open(datafile, "rb") self.data = cPickle.load(df) df.close() features = [str(self.data[i][2:]) for i in range(len(self.data))] targets = [str(self.data[i][1]) for i in range(len(self.data))] self.data = zip(features, targets) if sep_features_targets: self.data = zip(self.data[0], self.data[1]) if data_fraction < 1: self.data = [pair for pair in self.data if r.random() < data_fraction] print self.data[0][0] print self.data[0][1] train_len = int(round(len(self.data)*(1 - test_fraction))) self.train_data = self.data[:train_len] self.test_data = self.data[train_len:] print len(self.train_data) print len(self.test_data) inputs = [] outputs = [] input_symbols = [] for pair in self.data: inputs.append(pair[0]) outputs.append(pair[1]) for x in inputs: for y in x: input_symbols.append(y) # Initialize layers self.input_mapper = SemanticalMapper(first_layer=True, inputs=input_symbols) self.structure = Structure(n_objects) self.semantical_mapper = SemanticalMapper(n_objects, n_types) self.attention = Attention(max_attention_depth, max_attention_objects, n_types) self.type_computer = TypeComputer(max_attention_depth*max_attention_objects*2, num_functions=self.n_functions, depth=computer_depth, n_types=self.n_types) self.output_mapper = OutputMapper(n_types, outputs)