Python SelfAttention示例，self_attention.SelfAttention Python示例

示例#1

0

显示文件

文件： sagan.py 项目： thepowerfuldeez/sagan_pytorch

    def __init__(self, batch_size, image_size=64, z_dim=100, conv_dim=64):
        super(Generator, self).__init__()
        self.imsize = image_size

        repeat_num = int(np.log2(self.imsize)) - 3
        mult = 2**repeat_num  # 8
        self.l1 = nn.Sequential(
            SpectralNorm(nn.ConvTranspose2d(z_dim, conv_dim * mult, 4)),
            nn.BatchNorm2d(conv_dim * mult), nn.ReLU())

        curr_dim = conv_dim * mult
        self.l2 = nn.Sequential(
            SpectralNorm(nn.ConvTranspose2d(curr_dim, curr_dim // 2, 4, 2, 1)),
            nn.BatchNorm2d(curr_dim // 2), nn.ReLU())

        curr_dim //= 2
        self.l3 = nn.Sequential(
            SpectralNorm(nn.ConvTranspose2d(curr_dim, curr_dim // 2, 4, 2, 1)),
            nn.BatchNorm2d(curr_dim // 2), nn.ReLU())

        if self.imsize == 64:
            curr_dim //= 2
            self.l4 = nn.Sequential(
                SpectralNorm(
                    nn.ConvTranspose2d(curr_dim, curr_dim // 2, 4, 2, 1)),
                nn.BatchNorm2d(curr_dim // 2), nn.ReLU())
            curr_dim //= 2

        self.last = nn.Sequential(nn.ConvTranspose2d(curr_dim, 3, 4, 2, 1),
                                  nn.Tanh())

        self.attn1 = SelfAttention(128, 'relu')
        self.attn2 = SelfAttention(64, 'relu')

示例#2

0

显示文件

 def __init__(self, config: Config, checkpoint_path: Optional[str] = None):
     super(Model, self).__init__()
     regularizer = keras.regularizers.l2(
         config.embedding_regularization_coef)
     if config.use_pretrained_embeddings:
         weights = _load_character_embeddings()
         self.embedding_layer = Embedding(config.vocab_size,
                                          config.embedding_size,
                                          weights=[weights],
                                          trainable=False,
                                          mask_zero=True)
     else:
         self.embedding_layer = Embedding(
             config.vocab_size,
             config.embedding_size,
             embeddings_regularizer=regularizer,
             mask_zero=True)
     dense_regularizer = keras.regularizers.l2(
         config.dense_regularization_coef)
     if config.use_word_level_embeddings:
         if checkpoint_path is None:
             weights = _load_word_embeddings(config.glove_vocab_size)[1]
             embed_init = Constant(weights)
         else:
             embed_init = None
         self.word_embedding_layer = Embedding(
             config.glove_vocab_size + 1,
             300,
             embeddings_initializer=embed_init,
             trainable=False,
             mask_zero=True)
         self.word_embedding_dropout = Dropout(config.dense_dropout)
         self.word_embbedding_attention = SelfAttention(2, 64)
         self.word_dense_h_1 = Dense(config.lstm_size,
                                     activation='relu',
                                     kernel_regularizer=dense_regularizer)
         self.word_dense_h_2 = Dense(config.lstm_size,
                                     activation='tanh',
                                     kernel_regularizer=dense_regularizer)
         self.word_dense_c_1 = Dense(config.lstm_size,
                                     activation='relu',
                                     kernel_regularizer=dense_regularizer)
         self.word_dense_c_2 = Dense(config.lstm_size,
                                     activation='tanh',
                                     kernel_regularizer=dense_regularizer)
     self.recurrent_layer = LSTM(config.lstm_size,
                                 recurrent_dropout=config.lstm_dropout,
                                 return_state=not config.use_attention,
                                 return_sequences=config.use_attention)
     if config.use_attention:
         self.attention_layer = SelfAttention(config.attention_num_heads,
                                              config.attention_head_size)
     self.attention_dropout = Dropout(config.dense_dropout)
     self.dense_layer = Dense(config.num_classes * 8,
                              kernel_regularizer=dense_regularizer,
                              activation='relu')
     self.output_layer = Dense(config.num_classes, activation=tf.nn.softmax)
     self.config = config
     if checkpoint_path is not None:
         self.load_weights(checkpoint_path)

示例#3

0

显示文件

文件： sagan.py 项目： thepowerfuldeez/sagan_pytorch

    def __init__(self, batch_size=64, image_size=64, conv_dim=64):
        super(Discriminator, self).__init__()
        self.imsize = image_size

        self.l1 = nn.Sequential(SpectralNorm(nn.Conv2d(3, conv_dim, 4, 2, 1)),
                                nn.LeakyReLU(0.1))
        curr_dim = conv_dim

        self.l2 = nn.Sequential(
            SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)),
            nn.LeakyReLU(0.1))
        curr_dim *= 2

        self.l3 = nn.Sequential(
            SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)),
            nn.LeakyReLU(0.1))
        curr_dim *= 2

        if self.imsize == 64:
            self.l4 = nn.Sequential(
                SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)),
                nn.LeakyReLU(0.1))
            curr_dim *= 2
        self.last = nn.Sequential(nn.Conv2d(curr_dim, 1, 4))

        self.attn1 = SelfAttention(256, 'relu')
        self.attn2 = SelfAttention(512, 'relu')

示例#4

0

显示文件

文件： model_mnist.py 项目： fengqian1989/pytorch-sagan

    def __init__(self):
        super(Discriminator, self).__init__()

        self.conv1 = SpectralNorm(
            nn.Conv2d(channels, 64, 3, stride=1, padding=(2, 2)))

        self.conv2 = SpectralNorm(
            nn.Conv2d(64, 64, 4, stride=2, padding=(1, 1)))
        self.conv3 = SpectralNorm(
            nn.Conv2d(64, 128, 3, stride=1, padding=(1, 1)))
        self.conv4 = SpectralNorm(
            nn.Conv2d(128, 128, 4, stride=2, padding=(1, 1)))

        self.conv5 = SpectralNorm(
            nn.Conv2d(128, 256, 3, stride=1, padding=(1, 1)))
        self.conv6 = SpectralNorm(
            nn.Conv2d(256, 256, 4, stride=2, padding=(1, 1)))

        self.attention_size = 32
        self.att = SelfAttention(256, self.attention_size)
        self.att_post = SelfAttentionPost(256, self.attention_size)

        self.conv7 = SpectralNorm(
            nn.Conv2d(256, 512, 3, stride=1, padding=(1, 1)))

        self.embed = SpectralNorm(nn.Linear(num_classes, w_g * w_g * 512))

        self.fc = SpectralNorm(nn.Linear(w_g * w_g * 512, 1))

示例#5

0

显示文件

 def __init__(self, embed_size, heads, forward_expansion, dropout, device):
     super(DecoderBlock, self).__init__()
     self.attention = SelfAttention(embed_size, heads)
     self.norm = nn.LayerNorm(embed_size)
     self.transformer_block = TransformerBlock(
         embed_size, heads, dropout, forward_expansion)
     self.dropout = nn.Dropout(dropout)

示例#6

0

显示文件

    def __init__(self, image_size=64, z_dim=100, conv_dim=64):
        # no use cause network needs to be build first and can not change duing forward
        super(Generator, self).__init__()
        self.imsize = image_size
        self.watch_list1 = [0]  # a list used to store attention map
        self.watch_list2 = [0]

        layers = []

        repeat_num = int(np.log2(self.imsize)) - 3  # 3
        mult = 2**repeat_num  # 8; multiplier to conv_dim
        curr_dim = z_dim  # initial dim equals z_dim
        tar_dim = conv_dim * mult  # initial tar_dim

        layers.append(
            SpectralNorm(nn.ConvTranspose2d(curr_dim, conv_dim * mult, 4)))
        layers.append(nn.BatchNorm2d(conv_dim *
                                     mult))  # batch norm before none-linearity
        layers.append(nn.ReLU())
        curr_dim = tar_dim
        tar_dim = int(tar_dim / 2)

        for i in range(repeat_num):
            layers.append(
                SpectralNorm(nn.ConvTranspose2d(curr_dim, tar_dim, 4, 2,
                                                1)))  # transpose
            layers.append(nn.BatchNorm2d(tar_dim))
            layers.append(nn.ReLU())
            curr_dim = tar_dim
            tar_dim = int(tar_dim / 2)

            if curr_dim == 64:
                self.attn1 = SelfAttention(64, self.watch_list1)
                layers.append(self.attn1)
            if curr_dim == 128:
                self.attn2 = SelfAttention(128, self.watch_list2)
                layers.append(self.attn2)

        layers.append(
            nn.ConvTranspose2d(curr_dim, 3, kernel_size=4, stride=2,
                               padding=1))
        layers.append(nn.Tanh())

        self.main = nn.Sequential(*layers)

示例#7

0

显示文件

文件： model.py 项目： Misakaaa/IDSF

    def __init__(self,
                 vocab_size,
                 embed_size,
                 hidden_size,
                 slot_size,
                 intent_size,
                 dropout=0.3,
                 pad_idx=0):
        super(SDEN, self).__init__()

        self.pad_idx = 0
        self.embed = nn.Embedding(vocab_size,
                                  embed_size,
                                  padding_idx=self.pad_idx)
        self.bigru_m = nn.GRU(embed_size,
                              hidden_size,
                              batch_first=True,
                              bidirectional=True)
        self.bigru_c = nn.GRU(embed_size,
                              hidden_size,
                              batch_first=True,
                              bidirectional=True)
        self.context_encoder = nn.Sequential(
            nn.Linear(hidden_size * 4, hidden_size * 2), nn.Sigmoid())
        self.Att = Attn('concat', hidden_size)
        self.context_encoder1 = nn.Sequential(
            nn.Linear(hidden_size * 8, hidden_size * 2), nn.Sigmoid())

        self.session_encoder = nn.GRU(hidden_size * 2,
                                      hidden_size * 2,
                                      batch_first=True,
                                      bidirectional=True)

        self.decoder_1 = nn.GRU(embed_size,
                                hidden_size * 2,
                                batch_first=True,
                                bidirectional=True)
        self.decoder_2 = nn.LSTM(hidden_size * 4,
                                 hidden_size * 2,
                                 batch_first=True,
                                 bidirectional=True)

        self.intent_linear = nn.Linear(hidden_size * 4, intent_size)
        self.slot_linear = nn.Linear(hidden_size * 4, slot_size)
        self.dropout = nn.Dropout(dropout)
        self.attention = SelfAttention(hidden_size)
        self.att = SelfA(hidden_size)
        self.hidden_size = hidden_size
        # self.att = Attn('concat', 64)

        for param in self.parameters():
            if len(param.size()) > 1:
                nn.init.xavier_uniform_(param)
            else:
                param.data.zero_()

示例#8

0

显示文件

文件： mypggan.py 项目： Gingaless/rkg

	def get_attn_layer(self, self_attn):

		attn_layer = None
		if self.attns_mode == SelfAttention:
			if self_attn > 0:
				attn_layer = SelfAttention(self_attn)
		if self.attns_mode == GoogleAttention:
			if self_attn != None:
				attn_layer = GoogleAttention(self_attn)

		return attn_layer

示例#9

0

显示文件

文件： transformer_block.py 项目： MutsumiChieda/implementations

    def __init__(self, k, heads, mask=False):
        super().__init__()

        self.attention = SelfAttention(k, heads=heads)

        self.norm1 = nn.LayerNorm(k)
        self.norm2 = nn.LayerNorm(k)

        self.ff = nn.Sequential(nn.Linear(k, 4 * k), nn.ReLU(),
                                nn.Linear(4 * k, k))

        self.mask = mask

示例#10

0

显示文件

文件： transformer_block.py 项目： soma-knzw/transformer_practice

    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super(TransformerBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)

        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion * embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion * embed_size, embed_size)
        )

        self.dropout = nn.Dropout(dropout)

示例#11

0

显示文件

    def __init__(self, conv_dim=64, watch_on=False):
        super(Discriminator, self).__init__()
        self.watch_list1 = [0]
        self.watch_list2 = [0]
        layers = []

        curr_dim = 3  # initial dim equals z_dim
        tar_dim = conv_dim  # initial tar_dim

        for i in range(4):
            layers.append(SpectralNorm(nn.Conv2d(curr_dim, tar_dim, 4, 2, 1)))
            layers.append(nn.BatchNorm2d(tar_dim))
            layers.append(nn.LeakyReLU(0.1))
            curr_dim = tar_dim
            tar_dim = curr_dim * 2

            if curr_dim == 256:
                layers.append(SelfAttention(256, self.watch_list1))
            if curr_dim == 512:
                layers.append(SelfAttention(512, self.watch_list2))
        layers.append(nn.Conv2d(curr_dim, 1, 4))
        self.main = nn.Sequential(*layers)

示例#12

0

显示文件

 def _self_attention(self):
     """
     Add the self_attention to the result of the fuse, the fuse_p_encode's size is
     (batch_size, time, 2*dim)
     """
     dim = self.fuse_p_encodes.shape[-1]
     atten_layer = SequenceMapperSeq(VariationalDropoutLayer(0.8),
                                     ResidualLayer(SequenceMapperSeq(
                                         SelfAttention(attention=TriLinear(bias=True), merge=ConcatWithProduct()),
                                         FullyConnected(dim, activation="relu")
                                     )),
                                     VariationalDropoutLayer(0.8)
                                     )
     self.fuse_p_encodes = atten_layer.apply(self.is_train, self.fuse_p_encodes, self.p_length)

示例#13

0

显示文件

文件： resnet50_self_attention.py 项目： dw-park/Stand-Alone-Self-Attention-in-Vision-Models

def conv_block(input_tensor, kernel_size, filters, stage, block, strides=2):
    """conv_block is the block that has a conv layer at shortcut

    # Arguments
        input_tensor: input tensor
        kernel_size: defualt 3, the kernel size of middle conv layer at main path
        filters: list of integers, the filterss of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names

    # Returns
        Output tensor for the block.

    Note that from stage 3, the first conv layer at main path is with strides=(2,2)
    And the shortcut should have strides=(2,2) as well
    """
    filters1, filters2, filters3 = filters
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    # x = Conv2D(filters2, kernel_size, padding='same',
    #            name=conv_name_base + '2b')(x)
    x = SelfAttention(filters2, kernel_size, 8)(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    if strides != 1:
        x = AveragePooling2D((2, 2), strides=strides, padding='same')(x)

    shortcut = Conv2D(filters3, (1, 1),
                      strides=strides,
                      name=conv_name_base + '1')(input_tensor)
    shortcut = BatchNormalization(axis=bn_axis,
                                  name=bn_name_base + '1')(shortcut)

    x = layers.add([x, shortcut])
    x = Activation('relu')(x)
    return x

示例#14

0

显示文件

文件： model_resnet.py 项目： fengqian1989/pytorch-sagan

    def __init__(self):
        super(Discriminator, self).__init__()

        self.first = FirstResBlockDiscriminator(channels, DISC_SIZE, stride=2)
        self.block1 = ResBlockDiscriminator(DISC_SIZE, DISC_SIZE, stride=2)
        self.block2 = ResBlockDiscriminator(DISC_SIZE, DISC_SIZE)

        self.attention_size = 16
        self.att = SelfAttention(128, self.attention_size)
        self.att_post = SelfAttentionPost(128, self.attention_size)

        self.block3 = ResBlockDiscriminator(DISC_SIZE, DISC_SIZE)
        self.pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(DISC_SIZE, 1)
        nn.init.xavier_uniform_(self.fc.weight.data, 1.)
        self.fc = SpectralNorm(self.fc)
        self.embed = SpectralNorm(nn.Linear(num_classes, DISC_SIZE))

示例#15

0

显示文件

文件： resnet50_self_attention.py 项目： dw-park/Stand-Alone-Self-Attention-in-Vision-Models

def identity_block(input_tensor, kernel_size, filters, stage, block):
    """The identity block is the block that has no conv layer at shortcut.

    # Arguments
        input_tensor: input tensor
        kernel_size: defualt 3, the kernel size of middle conv layer at main path
        filters: list of integers, the filterss of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names

    # Returns
        Output tensor for the block.
    """
    filters1, filters2, filters3 = filters
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = Activation('relu')(x)

    # x = Conv2D(filters2, kernel_size,
    #            padding='same', name=conv_name_base + '2b')(x)
    x = SelfAttention(filters2, kernel_size, 8)(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = Activation('relu')(x)

    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    x = layers.add([x, input_tensor])
    x = Activation('relu')(x)
    return x

示例#16

0

显示文件

l2_regularization = 0.001
learning_rate = 0.01
n_x = 32
epochs = 20
time_steps = MAX_LENGTH

# Build model
print("Build model...")
sequence_input = Input(shape=(time_steps, ), dtype='float32')
print('Sequence input is:', sequence_input)  # (batch_size, time_steps=500)
embedded_sequences = embedding_layer(sequence_input)
print('Embedding layer is:',
      embedded_sequences)  # (batch_size, time_steps=500, embedding_dim=25)

# Self attention
self_att = SelfAttention(
    8, 16)([embedded_sequences, embedded_sequences, embedded_sequences])

L = Bidirectional(
    GRU(n_x,
        activation='tanh',
        dropout=0.2,
        recurrent_dropout=0.1,
        return_sequences=True,
        kernel_initializer='he_uniform',
        name='Pre-BiGRU'))(self_att)
print('Bi-GRU is:', L)  # (batch_size, time_steps, units=32*2)
'''''' '''''' '''
Original attention:
''' '''''' '''
L = __attention3DBlock__(L) # (batch_size, time_steps=500, units=32*2)
print ('Attention layer is:', L)

示例#17

0

显示文件

文件： train.py 项目： wurentidai/self-attention-tf

def train():
    with tf.device('/cpu:0'):
        x_text, y = data_helpers.load_data_and_labels(FLAGS.train_dir)

    # Build vocabulary
    # Example: x_text[3] = "A misty ridge uprises from the surge."
    # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>']
    # =>
    # [27 39 40 41 42  1 43  0  0 ... 0]
    # dimension = FLAGS.max_sentence_length
    vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(FLAGS.max_sentence_length)
    x = np.array(list(vocab_processor.fit_transform(x_text)))
    print("Text Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))

    print("x = {0}".format(x.shape))
    print("y = {0}".format(y.shape))
    print("")

    # Randomly shuffle data
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]

    # Split train/test set
    # TODO: This is very crude, should use cross-validation
    dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
    print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            model = SelfAttention(
                sequence_length=x_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=FLAGS.embedding_dim,
                hidden_size=FLAGS.hidden_size,
                d_a_size=FLAGS.d_a_size,
                r_size=FLAGS.r_size,
                fc_size=FLAGS.fc_size,
                p_coef=FLAGS.p_coef
            )

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(model.loss, global_step=global_step)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", model.loss)
            acc_summary = tf.summary.scalar("accuracy", model.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "vocab"))

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Pre-trained word2vec
            if FLAGS.word2vec:
                # initial matrix with random uniform
                initW = np.random.uniform(-0.25, 0.25, (len(vocab_processor.vocabulary_), FLAGS.embedding_dim))
                # load any vectors from the word2vec
                print("Load word2vec file {0}".format(FLAGS.word2vec))
                with open(FLAGS.word2vec, "rb") as f:
                    header = f.readline()
                    vocab_size, layer1_size = map(int, header.split())
                    binary_len = np.dtype('float32').itemsize * layer1_size
                    for line in range(vocab_size):
                        word = []
                        while True:
                            ch = f.read(1).decode('latin-1')
                            if ch == ' ':
                                word = ''.join(word)
                                break
                            if ch != '\n':
                                word.append(ch)
                        idx = vocab_processor.vocabulary_.get(word)
                        if idx != 0:
                            initW[idx] = np.fromstring(f.read(binary_len), dtype='float32')
                        else:
                            f.read(binary_len)
                sess.run(model.W_text.assign(initW))
                print("Success to load pre-trained word2vec model!\n")

            # Generate batches
            batches = data_helpers.batch_iter(
                list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
            # Training loop. For each batch...
            for batch in batches:
                x_batch, y_batch = zip(*batch)

                # Train
                feed_dict = {
                    model.input_text: x_batch,
                    model.input_y: y_batch
                }

                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, model.loss, model.accuracy], feed_dict)
                train_summary_writer.add_summary(summaries, step)

                # Training log display
                if step % FLAGS.display_every == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))

                # Evaluation
                if step % FLAGS.evaluate_every == 0:
                    print("\nEvaluation:")
                    # Generate batches
                    batches_dev = data_helpers.batch_iter(
                        list(zip(x_dev, y_dev)), FLAGS.batch_size, 1)
                    # Evaluation loop. For each batch...
                    loss_dev = 0
                    accuracy_dev = 0
                    cnt = 0
                    for batch_dev in batches_dev:
                        x_batch_dev, y_batch_dev = zip(*batch_dev)

                        feed_dict_dev = {
                            model.input_text: x_batch_dev,
                            model.input_y: y_batch_dev
                        }

                        summaries_dev, loss, accuracy = sess.run(
                            [dev_summary_op, model.loss, model.accuracy], feed_dict_dev)
                        dev_summary_writer.add_summary(summaries_dev, step)

                        loss_dev += loss
                        accuracy_dev += accuracy
                        cnt += 1

                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss_dev / cnt, accuracy_dev / cnt))

                # Model checkpoint
                if step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=step)
                    print("Saved model checkpoint to {}\n".format(path))

示例#18

0

显示文件

文件： resnet50_self_attention.py 项目： dw-park/Stand-Alone-Self-Attention-in-Vision-Models

def ResNet50(include_top=True,
             input_shape=(224, 224, 3),
             pooling=None,
             classes=1000,
             stem="SA",
             repeat=[1, 2, 4, 1]):
    """Instantiates the ResNet50 architecture.

    Optionally loads weights pre-trained
    on ImageNet. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format="channels_last"` in your Keras config
    at ~/.keras/keras.json.

    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.

    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization)
            or "imagenet" (pre-training on ImageNet).
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 244)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 197.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """

    img_input = Input(input_shape)
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1

    if stem == 'conv':
        x = Conv2D(64, (7, 7), strides=(1, 1), name='conv1',
                   padding="same")(img_input)
        x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((3, 3), strides=(1, 1))(x)
    elif stem == 'SA':
        x = SelfAttention(hidden_dim=64,
                          k_size=4,
                          Nh=1,
                          strides=1,
                          padding='SAME',
                          m_for_stem=4)(img_input)

        #x = MaxPooling2D((4, 4), strides=(4, 4))(x)

    x = conv_block(x, 7, [64, 64, 256], stage=2, block=a, strides=1)
    for i in range(repeat[0]):
        x = identity_block(x, 7, [64, 64, 256], stage=2, block=chr(98 + i))

    x = conv_block(x, 7, [128, 128, 512], stage=3, block='a')
    for i in range(repeat[1]):
        x = identity_block(x, 7, [64, 64, 256], stage=3, block=chr(98 + i))

    x = conv_block(x, 7, [256, 256, 1024], stage=4, block='a')
    for i in range(repeat[2]):
        x = identity_block(x, 7, [64, 64, 256], stage=4, block=chr(98 + i))

    x = conv_block(x, 7, [512, 512, 2048], stage=5, block='a')
    for i in range(repeat[3]):
        x = identity_block(x, 7, [512, 512, 2048], stage=5, block=chr(98 + i))

    x = AveragePooling2D((4, 4), name='avg_pool')(x)

    if include_top:
        x = Flatten()(x)
        x = Dense(classes, activation='softmax', name='fc1000')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    inputs = img_input
    # Create model.
    model = Model(inputs, x, name='resnet50')

    return model

示例#19

0

显示文件

def build_model(word_index):
    embedding_matrix = get_embedding_matrix(word_index)
    print('Building model...')
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                trainable=True)
    # model
    # ------ news encoder -------
    title_input = Input(shape=(MAX_TITLE_LENGTH, ), dtype='int32')
    title_embedded_sequences = embedding_layer(title_input)
    title_embedded_sequences = Dropout(0.2)(title_embedded_sequences)
    title_selfattention = SelfAttention(16, 16)([
        title_embedded_sequences, title_embedded_sequences,
        title_embedded_sequences
    ])
    title_selfattention = Dropout(0.2)(title_selfattention)
    news_r = Attention(200)(title_selfattention)

    news_encoder = Model([title_input], news_r, name='news_encoder')
    # from tensorflow.keras.utils import plot_model
    # plot_model(news_encoder, to_file='news_encoder.png', show_shapes=True)

    # ----- user encoder -----
    browsed_title_input = Input((
        MAX_BROWSED,
        MAX_TITLE_LENGTH,
    ),
                                dtype='int32',
                                name='b_t')
    browsed_news = TimeDistributed(news_encoder)(browsed_title_input)

    user_input = Input((
        MAX_BROWSED,
        256,
    ), name='user_input')
    user_r = SelfAttention(16, 16)([user_input, user_input, user_input])
    user_r = Dropout(0.2)(user_r)
    user_r = Attention(200)(user_r)
    user_encoder = Model(user_input, user_r, name='user_encoder')

    train_user_r = user_encoder(browsed_news)
    test_user_r = Input((256, ), name='test_user_r')
    # ----- candidate_news -----
    candidate_title_input = Input((
        1 + NEG_SAMPLE,
        MAX_TITLE_LENGTH,
    ),
                                  dtype='int32',
                                  name='c_t')
    candidate_r = TimeDistributed(news_encoder)(candidate_title_input)

    candidate_one_r = Input((256, ), name="c_t_1")

    # ----- click predictor -----
    pred = Dot(axes=-1)([train_user_r, candidate_r])
    pred = Activation(activation='softmax')(pred)
    model = Model([browsed_title_input, candidate_title_input], pred)

    pred_one = Dot(axes=-1)([test_user_r, candidate_one_r])
    pred_one = Activation(activation='sigmoid')(pred_one)
    model_test = Model([test_user_r, candidate_one_r], pred_one)

    return news_encoder, user_encoder, model, model_test