Пример #1
0
def ACM(x, blockname, groups=32):
    b, w, h, c = K.int_shape(x)
    mu = tf.reduce_mean(x, axis=[1, 2], name=blockname + '_mu')
    mu = tf.expand_dims(mu, axis=1)
    mu = tf.expand_dims(mu, axis=1)
    P = Conv2D(c // 2,
               1,
               padding='same',
               groups=groups,
               name=blockname + '_P1')(mu)
    P = relu(P)
    P = Conv2D(c, 1, padding='same', groups=groups, name=blockname + '_P2')(P)
    P = sigmoid(P)

    x_mu = x - mu
    k = Conv2D(c, 1, padding='same', groups=groups,
               name=blockname + '_K')(x_mu)
    q = Conv2D(c, 1, padding='same', groups=groups,
               name=blockname + '_Q')(x_mu)
    k = softmax(k)
    q = softmax(q)
    k = x_mu * k
    q = x_mu * q
    k = K.sum(k, axis=[1, 2])
    q = K.sum(q, axis=[1, 2])
    k_q = k - q
    y = x + k_q
    y = y * P

    return y
    def get_mixture_coef(self, out_tensor):
        """ Parses the output tensor to appropriate mixture density coefficients"""
        # This uses eqns 18 -> 23 of http://arxiv.org/abs/1308.0850.

        # Pen states:
        z_pen_logits = out_tensor[:, :, 0:3]
        # Process outputs into MDN parameters
        M = self.hps['num_mixture']
        dist_params = [
            out_tensor[:, :, (3 + M * (n - 1)):(3 + M * n)]
            for n in range(1, 7)
        ]
        z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = dist_params

        # Softmax all the pi's and pen states:
        z_pi = softmax(z_pi)
        z_pen = softmax(z_pen_logits)

        # Exponent the sigmas and also make corr between -1 and 1.
        z_sigma1 = K.exp(z_sigma1)
        z_sigma2 = K.exp(z_sigma2)
        z_corr = tanh(z_corr)

        r = [
            z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen, z_pen_logits
        ]
        return r
    def call(self, x, mask=None):

        # Get input tensor static shape
        _, N, C = x.get_shape().as_list()
        head_dim = C // self.num_heads

        x_qkv = self.qkv(x)
        x_qkv = tf.reshape(x_qkv, shape=(-1, N, 3, self.num_heads, head_dim))
        x_qkv = tf.transpose(x_qkv, perm=(2, 0, 3, 1, 4))
        q, k, v = x_qkv[0], x_qkv[1], x_qkv[2]

        # Query rescaling
        q = q * self.scale

        # multi-headed self-attention
        k = tf.transpose(k, perm=(0, 1, 3, 2))
        attn = (q @ k)

        # Shift window
        num_window_elements = self.window_size[0] * self.window_size[1]
        relative_position_index_flat = tf.reshape(self.relative_position_index,
                                                  shape=(-1, ))
        relative_position_bias = tf.gather(self.relative_position_bias_table,
                                           relative_position_index_flat)
        relative_position_bias = tf.reshape(relative_position_bias,
                                            shape=(num_window_elements,
                                                   num_window_elements, -1))
        relative_position_bias = tf.transpose(relative_position_bias,
                                              perm=(2, 0, 1))
        attn = attn + tf.expand_dims(relative_position_bias, axis=0)

        if mask is not None:
            nW = mask.get_shape()[0]
            mask_float = tf.cast(
                tf.expand_dims(tf.expand_dims(mask, axis=1), axis=0),
                tf.float32)
            attn = tf.reshape(
                attn, shape=(-1, nW, self.num_heads, N, N)) + mask_float
            attn = tf.reshape(attn, shape=(-1, self.num_heads, N, N))
            attn = softmax(attn, axis=-1)
        else:
            attn = softmax(attn, axis=-1)

        # Dropout after attention
        attn = self.attn_drop(attn)

        # Merge qkv vectors
        x_qkv = (attn @ v)
        x_qkv = tf.transpose(x_qkv, perm=(0, 2, 1, 3))
        x_qkv = tf.reshape(x_qkv, shape=(-1, N, C))

        # Linear projection
        x_qkv = self.proj(x_qkv)

        # Dropout after projection
        x_qkv = self.proj_drop(x_qkv)

        return x_qkv
Пример #4
0
    def call(self, logits, mask=None):
        if mask is not None:
            # print mask
            m = tf.cast(mask[0], logits.dtype)
            # m = tf.cast(mask[0][0], logits.dtype)
            mm = tf.expand_dims(m, -2) * tf.expand_dims(m, -1)

            adder = (1.0 - mm) * self._small_negative
            return logits + adder, softmax(logits + adder)

        return logits, softmax(logits)
Пример #5
0
    def __call__(self, inputs, training):
        x, mask = inputs

        g_x = iterative_call(self.inner_layers[0], x, training=training)
        theta_x = iterative_call(self.inner_layers[1], x, training=training)
        phi_x = iterative_call(self.inner_layers[2], x, training=training)

        f_div_C =  softmax(tf.linalg.matmul(theta_x, phi_x) / self.temperature, axis=2)

        inverted_mask = 1. - mask
        mask = tf.image.resize(mask, size=x.shape[1:3], method=tf.image.ResizeMethod.BILINEAR)
        mask = tf.where(tf.greater(mask, 0.), tf.zeros_like(mask), mask)
        mask = 1. - mask

        inverted_mask = tf.image.resize(inverted_mask, size=(x.shape[1:3]), method=tf.image.ResizeMethod.BILINEAR)
        mask *= inverted_mask

        mask_expand = tf.reshape(mask, (x.shape[0], 1, -1))
        mask_expand = tf.repeat(mask_expand, x.shape[1] * x.shape[2], axis=1)
        if self.use_self:
            mask_expand[:, range(x.shape[1] * x.shape[2]), range(x.shape[1] * x.shape[2])] = 1.0

        f_div_C = mask_expand * f_div_C
        if self.re_norm:
            f_div_C = tf.keras.utils.normalize(f_div_C, axis=2, order=1)

        y = tf.reshape(tf.linalg.matmul(f_div_C, g_x), (x.shape[0], *x.shape[1:3], self.inter_channels))

        W_y = iterative_call(self.inner_layers[3], y, training=training)
        W_y = iterative_call(self.inner_layers[4], W_y, training=training)

        assert self.mode.casefold() == 'combine'
        full_mask = tf.repeat(mask, self.inter_channels, axis=3)
        z = full_mask * x + (1 - full_mask) * W_y
        return z
Пример #6
0
def evaluate(adj,
             x,
             labels,
             idx_train,
             idx_val,
             idx_test,
             target,
             retrain_iters=2,
             norm_x='l1'):

    classification_margins = []
    class_distrs = []
    for _ in range(retrain_iters):
        print(f"... {_+1}/{retrain_iters}")
        model = GCN(adj,
                    x,
                    labels,
                    device='GPU:0',
                    seed=123 + _,
                    norm_x=norm_x)
        model.build()
        his = model.train(idx_train, idx_val, verbose=0, epochs=100)
        logit = softmax(model.predict(target)).numpy().ravel()

        class_distrs.append(logit)
        best_second_class_before = (
            logit - np.eye(data.n_classes)[labels[target]]).argmax()
        margin = logit[labels[target]] - logit[best_second_class_before]
        classification_margins.append(margin)
        model.close

    class_distrs = np.asarray(class_distrs)
    print(classification_margins)
    return class_distrs
Пример #7
0
    def generate(self, mask_in):
        paddings = [
            [0, 0],
            [self.padding, self.padding],
            [self.padding, self.padding],
            [0, 0],
        ]
        mask = tf.pad(mask_in, paddings)
        # NOTE(brendan): this unfolding is for convolution
        mask = tf.image.extract_patches(
            mask,
            sizes=(1, (2 * self.radius) + 1, (2 * self.radius) + 1, 1),
            strides=4 * (1, ),
            rates=4 * (1, ),
            padding="VALID",
        )
        mask = tf.image.resize(mask, size=self.shape_up, method="nearest")
        mask = mask[:, :-self.step + 1, :-self.step + 1, :]
        # NOTE(brendan): convolve Gaussian weights with mask (smoothness inductive bias)
        mask = self.weight * mask

        mask = mask * softmax(self.coldness * mask, axis=-1)
        mask = tf.reduce_sum(mask, axis=-1, keepdims=True)

        m = round(self.margin)
        if self.clamp:
            mask = tf.clip_by_value(mask, clip_value_min=0, clip_value_max=1)
        cropped = mask[:, m:m + self.shape[0], m:m + self.shape[1], :]
        return cropped, mask
Пример #8
0
    def call(self,
             queries,
             keys,
             values,
             mask=None,
             training=False,
             attentions=False):
        """
        inputs
            q: (queries_size x timesteps x input_dim)
            keys: (keys memory_size x memory_timesteps x input_dim)
            values: (values queries_size x timesteps x output_dim)
        returns
            output: context vector (queries_size x timesteps x out_dim)
            attn: attn weights (queries_size x timesteps x out_dim)
        """
        attn = tf.matmul(queries, keys, transpose_b=True)
        scale_dim = keys.shape[2]
        attn /= np.sqrt(scale_dim)
        if mask is not None:
            attn += mask
        attn = activations.softmax(attn)
        if training:
            attn = self.dropout(attn)
        output = tf.matmul(attn, values)

        if attentions:
            return output, attn
        else:
            return output
    def build(self, z):
        input_shape, _ = z

        self.q_net = Dense(self.dout, input_shape=input_shape, use_bias=False,
                           kernel_constraint=MaskWeights(self.mask),
                           name="q_dense")
        self.k_net = Dense(self.dout, input_shape=input_shape, use_bias=False,
                           kernel_constraint=MaskWeights(self.mask),
                           name="k_dense")
        self.v_net = Dense(self.dout, input_shape=input_shape, use_bias=False,
                           kernel_constraint=MaskWeights(self.mask),
                           name="v_dense")

        self.out_net = Dense(self.dout,
                             kernel_constraint=MaskWeights(self.mask),
                             name="head_combiner")

        self.attention = Lambda(lambda x: activations.softmax(
            tf.matmul(x[0], x[1], transpose_b=True) / (self.dk ** -0.5)
        ),
                                name='scale_dot_attention')
        self.dot_product = Lambda(lambda x: tf.matmul(x[0], x[1]),
                                  name='dot_product')

        self.head_split = Reshape((input_shape[1], self.h * self.nh, self.dk))
        self.head_swap = Lambda(lambda x: tf.transpose(x, [0, 2, 1, 3]))
        self.head_merge = Reshape((input_shape[1], self.dout))

        super(MaskedMultiHeadAttention, self).build(input_shape)
Пример #10
0
    def call(self, inputs, mask=None, **kwargs):

        query, value = inputs
        embedding_size = query.shape[-1]
        step_size = value.shape[1]

        if self.attention_type == 'inner':
            attention_score = tf.squeeze(tf.matmul(value, query, transpose_b=True), axis=-1)
        else:
            querys = tf.tile(query, [1, step_size, 1])
            query_value = tf.concat([querys, value, querys-value, querys*value], axis=-1)
            attention_score = self.out_kernel(self.mlp(query_value))
            attention_score = tf.reshape(attention_score, (-1, step_size))

        if mask is not None:
            if mask[0] is not None:
                raise ValueError('query should not support mask')
            if mask[1] is not None:
                min_value_matrix = tf.ones_like(attention_score) * (-2**31)
                attention_score = tf.where(mask[1], attention_score, min_value_matrix)

        attention_score = tf.divide(attention_score, tf.sqrt(embedding_size*1.0))

        if self.norm:
            weighted_att_score = activations.softmax(attention_score)
            attention_vec = tf.squeeze(tf.matmul(tf.expand_dims(weighted_att_score, axis=1), value), axis=1)

        if not self.keepdims:
            out_shape = (-1, embedding_size)
        else:
            out_shape = (-1, 1, embedding_size)

        return tf.reshape(attention_vec, out_shape)
Пример #11
0
    def random_sample(self, sample_epochs=20, disable=False):
        best_loss = -np.inf
        best_s = None
        s = tf.linalg.band_part(self.adj_changes, 0, -1) - tf.linalg.band_part(
            self.adj_changes, 0, 0)
        for _ in tqdm(range(sample_epochs),
                      desc='Random Sampling',
                      disable=disable):
            random_matrix = tf.random.uniform(shape=(self.n_nodes,
                                                     self.n_nodes),
                                              minval=0.,
                                              maxval=1.)
            sampled = tf.where(s > random_matrix, 1., 0.)
            if tf.reduce_sum(sampled) > self.n_perturbations:
                continue

            with tf.device(self.device):
                self.adj_changes.assign(sampled)
                adj = self.get_perturbed_adj()
                adj_norm = normalize_adj_tensor(adj)
                logit = self.surrogate([self.tf_x, adj_norm, self.idx_attack])
                logit = softmax(logit)
                loss = self.compute_loss(logit)

            if best_loss < loss:
                best_loss = loss
                best_s = sampled

        return best_s.numpy()
Пример #12
0
    def _read_inputs(self, inputs):
        """
        Applies transformations to `inputs` to get control for this module.
        Computes elements in the interface vector 𝜉
        """

        def linear(name, first_dim=None, second_dim=None):
            """
            Returns a linear transformation of `inputs`. If first_dim and second_dim
            are provide, reshape the resulting Tensor
            """
            linear = self._layers[name](inputs)
            if first_dim and second_dim:
                linear = tf.reshape(linear, [-1, first_dim, second_dim])
            return linear

        # v_t^i - The vectors to write to memory, for each write head `i`.
        write_vectors = linear('write_vectors', self._num_writes, self._word_size)

        # e_t^i - Amount to erase the memory by before writing, for each write head.
        erase_vectors = linear('erase_vectors', self._num_writes, self._word_size)

        # f_t^j - Amount that the memory at the locations read from at the previous
        # time step can be declared unused, for each read head `j`.
        free_gate = linear('free_gate')

        # g_t^{a, i} - Interpolation between writing to unallocated memory and
        # content-based lookup, for each write head `i`. Note: `a` is simply used to
        # identify this gate with allocation vs writing (as defined below).
        allocation_gate = linear('allocation_gate')

        # g_t^{w, i} - Overall gating of write amount for each write head.
        write_gate = linear('write_gate')

        # 𝜋_t^j - Mixing between "backwards" and "forwards" positions (for
        # each write head), and content-based lookup, for each read head.
        num_read_modes = 1 + 2 * self._num_writes
        read_mode = linear('read_mode', self._num_reads, num_read_modes)
        read_mode = activations.softmax(read_mode)

        # Parameters for the (read / write) "weights by content matching" modules.
        write_keys = linear('write_keys', self._num_writes, self._word_size)
        write_strengths = linear('write_strengths')

        read_keys = linear('read_keys', self._num_reads, self._word_size)
        read_strengths = linear('read_strengths')
        
        result = dict(
            read_keys=read_keys,
            read_strengths=read_strengths,
            write_keys=write_keys,
            write_strengths=write_strengths,
            write_vectors=write_vectors,
            erase_vectors=erase_vectors,
            free_gate=free_gate,
            allocation_gate=allocation_gate,
            write_gate=write_gate,
            read_mode=read_mode,
        )
        return result
Пример #13
0
 def getAction(self, game):
     logits, _ = self.model(tf.convert_to_tensor(self.extractState(game)))
     probs = activations.softmax(logits)
     action = np.random.choice(self.action_size, p=probs.numpy()[0])
     while action >= len(game.getMoves()):
         action = np.random.choice(self.action_size, p=probs.numpy()[0])
     return game.getMoves()[action]
Пример #14
0
def build_model(w,h,num_classes, dropout=.5, l2_reg=0.,conv_type='ds'):
	X = Input(shape=(h,w,3),name='X')
	#conv1 = conv_block(X,conv_type,64,3,1, name='conv1',num_conv_layers=2,l2_reg=l2_reg)
	#conv2 = conv_block(conv1,conv_type,128,3,1,name='conv2',l2_reg=l2_reg)
	conv1 = conv_block(X,conv_type,64,3,1, name='conv1',num_conv_layers=2,l2_reg=l2_reg,use_bn=True)
	conv2 = conv_block(conv1,conv_type,128,3,1,name='conv2',num_conv_layers=2,l2_reg=l2_reg,use_bn=True)
	conv3 = conv_block(conv2,conv_type,256,3,1,name='conv3',l2_reg=l2_reg,use_bn=True)
	conv4 = conv_block(conv3,conv_type,512,3,1,name='conv4',l2_reg=l2_reg,use_bn=True)
	conv5 = conv_block(conv4,conv_type,512,3,1,name='conv5',l2_reg=l2_reg,use_bn=True)

	fc1 = fc_block(conv5,conv_type,4096,(7,7), strides=(1,1), dropout=dropout, name='fc1',l2_reg=l2_reg,use_dropout=True,use_bn=True)
	fc2 = fc_block(fc1,conv_type,4096,(1,1), strides=(1,1), dropout=dropout, name='fc2',l2_reg=l2_reg,use_dropout=True,use_bn=True)

	score32 = score_block(fc2,conv_type,num_classes,name='score32',l2_reg=l2_reg)
	score16 = score_block(conv4,conv_type,num_classes,name='score16',l2_reg=l2_reg)
	score8 = score_block(conv3,conv_type,num_classes,name='score8',l2_reg=l2_reg)

	upscore32 = upsample_block(score32,num_classes,4,2,name='upscore32',l2_reg=l2_reg)
	upscore32c = crop(score16,name='upscore32c')(upscore32)
	fuse1 = Add(name='fuse1')([score16, upscore32c])

	upscore16 = upsample_block(fuse1,num_classes,4,2,name='upscore16',l2_reg=l2_reg)
	upscore16c = crop(score8,name='upscore16c')(upscore16)
	fuse2 = Add(name='fuse2')([upscore16c, score8])

	upscore8 = UpSampling2D((8,8),name='upscore8')(fuse2)
	upscore8c = crop(X,name='upscore8c')(upscore8)
	classifier = Lambda(lambda x: softmax(x))(upscore8c)

	fcn8 = Model(inputs=X, outputs=classifier, name = 'FCN8')
	return fcn8
Пример #15
0
    def call(self, inputs):
        X, A = inputs

        N = K.shape(A)[-1]
        # Check if the layer is operating in mixed or batch mode
        mode = ops.autodetect_mode(X, A)
        self.reduce_loss = mode in (modes.MIXED, modes.BATCH)

        # Get normalized adjacency
        if K.is_sparse(A):
            I_ = tf.sparse.eye(N, dtype=A.dtype)
            A_ = tf.sparse.add(A, I_)
        else:
            I_ = tf.eye(N, dtype=A.dtype)
            A_ = A + I_
        fltr = ops.normalize_A(A_)

        # Node embeddings
        Z = K.dot(X, self.kernel_emb)
        Z = ops.modal_dot(fltr, Z)
        if self.activation is not None:
            Z = self.activation(Z)

        # Compute cluster assignment matrix
        S = K.dot(X, self.kernel_pool)
        S = ops.modal_dot(fltr, S)
        S = activations.softmax(S, axis=-1)  # softmax applied row-wise

        # Link prediction loss
        S_gram = ops.modal_dot(S, S, transpose_b=True)
        if mode == modes.MIXED:
            A = tf.sparse.to_dense(A)[None, ...]
        if K.is_sparse(A):
            LP_loss = tf.sparse.add(A, -S_gram)  # A/tf.norm(A) - S_gram/tf.norm(S_gram)
        else:
            LP_loss = A - S_gram
        LP_loss = tf.norm(LP_loss, axis=(-1, -2))
        if self.reduce_loss:
            LP_loss = K.mean(LP_loss)
        self.add_loss(LP_loss)

        # Entropy loss
        entr = tf.negative(
            tf.reduce_sum(tf.multiply(S, K.log(S + K.epsilon())), axis=-1)
        )
        entr_loss = K.mean(entr, axis=-1)
        if self.reduce_loss:
            entr_loss = K.mean(entr_loss)
        self.add_loss(entr_loss)

        # Pooling
        X_pooled = ops.modal_dot(S, Z, transpose_a=True)
        A_pooled = ops.matmul_at_b_a(S, A)

        output = [X_pooled, A_pooled]

        if self.return_mask:
            output.append(S)

        return output
Пример #16
0
def convModel_APIHandler():
    """
    This function represents a Flask API endpoint that serves a Keras image classifier.
    If the input data is valid, this function will return the models top prediction for the
    image, along with all of the probabilities for the classes. 
    """
    try:
        # image is expected to be recieved in a base64 encoded string which will be decoded
        # and converted to a tensor and then preprocessed
        base64_image_string = request.form['image_data_buffer']
        base64_decoded = base64.b64decode(base64_image_string)
        image = Image.open(io.BytesIO(base64_decoded))
        preprocessed_img = prepare_image(image)
        raw_logits_tensor = model(preprocessed_img)
        prob_distribution = softmax(raw_logits_tensor)
        topPrediction = class_indices[prob_distribution.numpy().argmax()]
        allProbsDict = get_all_probs(prob_distribution)
        resp = jsonify(MostLikelyClass=topPrediction, allProbs=allProbsDict)
        resp.status_code = 200
        return resp
    except:
        resp = jsonify({
            'message':
            'There was an error processing your image by the server. Try a different image?'
        })
        resp.status_code = 500
        return resp
Пример #17
0
def squeeze_net(shape):
    inp = Input(shape=shape)
    conv1 = Conv2D(filters=96,
                   kernel_size=(7, 7),
                   strides=(2, 2),
                   activation='relu',
                   padding='same')(inp)
    pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(conv1)
    fire1 = fire_module(16, 64, 64)(pool1)
    fire2 = fire_module(16, 64, 64)(fire1)
    fire3 = fire_module(32, 128, 128)(fire2)
    pool2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(fire3)
    fire4 = fire_module(32, 128, 128)(pool2)
    fire5 = fire_module(48, 192, 192)(fire4)
    fire6 = fire_module(48, 192, 192)(fire5)
    fire7 = fire_module(64, 256, 256)(fire6)
    pool3 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(fire7)
    fire8 = fire_module(64, 256, 256)(pool3)
    drop1 = Dropout(.5)(fire8)

    # conv2 should have number of filters same as desired number of outputs
    conv2 = Conv2D(filters=1000,
                   kernel_size=(1, 1),
                   strides=(1, 1),
                   activation='relu')(drop1)
    pool4 = GlobalAveragePooling2D()(conv2)

    # activation functions should be changed to switch to regression
    out = softmax(pool4)

    model = Model(inputs=inp, outputs=[out])
    return model
Пример #18
0
def multiplicative_self_attention(units, n_hidden=None, n_output_features=None, activation=None):
    """
    Compute multiplicative self attention for time series of vectors (with batch dimension)
    the formula: score(h_i, h_j) = <W_1 h_i,  W_2 h_j>,  W_1 and W_2 are learnable matrices
    with dimensionality [n_hidden, n_input_features]

    Args:
        units: tf tensor with dimensionality [batch_size, time_steps, n_input_features]
        n_hidden: number of units in hidden representation of similarity measure
        n_output_features: number of features in output dense layer
        activation: activation at the output

    Returns:
        output: self attended tensor with dimensionality [batch_size, time_steps, n_output_features]
    """
    n_input_features = K.int_shape(units)[2]
    if n_hidden is None:
        n_hidden = n_input_features
    if n_output_features is None:
        n_output_features = n_input_features
    exp1 = Lambda(lambda x: expand_tile(x, axis=1))(units)
    exp2 = Lambda(lambda x: expand_tile(x, axis=2))(units)
    queries = Dense(n_hidden)(exp1)
    keys = Dense(n_hidden)(exp2)
    scores = Lambda(lambda x: K.sum(queries * x, axis=3, keepdims=True))(keys)
    attention = Lambda(lambda x: softmax(x, axis=2))(scores)
    mult = Multiply()([attention, exp1])
    attended_units = Lambda(lambda x: K.sum(x, axis=2))(mult)
    output = Dense(n_output_features, activation=activation)(attended_units)
    return output
Пример #19
0
def additive_self_attention(units, n_hidden=None, n_output_features=None, activation=None):
    """
    Compute additive self attention for time series of vectors (with batch dimension)
            the formula: score(h_i, h_j) = <v, tanh(W_1 h_i + W_2 h_j)>
            v is a learnable vector of n_hidden dimensionality,
            W_1 and W_2 are learnable [n_hidden, n_input_features] matrices

    Args:
        units: tf tensor with dimensionality [batch_size, time_steps, n_input_features]
        n_hidden: number of2784131 units in hidden representation of similarity measure
        n_output_features: number of features in output dense layer
        activation: activation at the output

    Returns:
        output: self attended tensor with dimensionality [batch_size, time_steps, n_output_features]
        """
    n_input_features = K.int_shape(units)[2]
    if n_hidden is None:
        n_hidden = n_input_features
    if n_output_features is None:
        n_output_features = n_input_features
    exp1 = Lambda(lambda x: expand_tile(x, axis=1))(units)
    exp2 = Lambda(lambda x: expand_tile(x, axis=2))(units)
    units_pairs = Concatenate(axis=3)([exp1, exp2])
    query = Dense(n_hidden, activation="tanh")(units_pairs)
    attention = Dense(1, activation=lambda x: softmax(x, axis=2))(query)
    attended_units = Lambda(lambda x: K.sum(attention * x, axis=2))(exp1)
    output = Dense(n_output_features, activation=activation)(attended_units)
    return output
Пример #20
0
def get_default_model():
    inputs = Input(shape=(None, None, 10))
    x = Conv2D(128, 3, padding='same')(inputs)
    x = Activation('relu')(x)
    x = Conv2D(10, 1)(x)
    outputs = softmax(x, axis=3)

    return tf.keras.Model(inputs=inputs, outputs=outputs)
Пример #21
0
    def call(self, inputs, **kwargs):
        features = inputs[0]
        relations = inputs[1]
        sx = inputs[2]
        sy = inputs[3]

        z, _ = self.kenn_layer_1(features, relations, sx, sy)

        return softmax(z)
Пример #22
0
    def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]
Пример #23
0
    def read(self, keys, scale=None):
        """Read from memory.

        Read the memory for given the keys. For each key in keys we will get one
        result as `r = sum_i M[i] a[i]` where `M[i]` is the memory content
        at location i and `a[i]` is the attention weight for key at location i.
        `a` is calculated as softmax of a scaled similarity between key and
        each memory content: `a[i] = exp(scale*sim[i])/(sum_i scale*sim[i])`

        Args:
            keys (Tensor): shape[-1] is dim.
              For single key read, the shape is (batch_size, dim).
              For multiple key read, the shape is (batch_szie, k, dim), where
              k is the number of keys.
            scale (None|float|Tensor): shape is () or keys.shape[:-1]. The
              cosine similarities are multiplied with `scale` before softmax
              is applied. If None, use the scale provided at constructor.
        Returns:
            resutl Tensor: shape is same as keys. result[..., i] is the read
              result for the corresponding key.

        """
        if not self._built:
            self.build(keys.shape[0])
        assert 2 <= len(keys.shape) <= 3
        assert keys.shape[0] == self._batch_size
        assert keys.shape[-1] == self.dim

        if scale is None:
            scale = self._scale
        else:
            if isinstance(scale, (int, float)):
                pass
            else:  # assuming it's Tensor
                scale = expand_dims_as(scale, keys)
        sim = layers.dot([keys, self._memory],
                         axes=-1,
                         normalize=self._normalize)
        sim = sim * scale

        attention = activations.softmax(sim)
        result = layers.dot([attention, self._memory], axes=(-1, 1))

        if len(sim.shape) > 2:  # multiple read keys
            usage = tf.reduce_sum(attention,
                                  axis=tf.range(1,
                                                len(sim.shape) - 1))
        else:
            usage = attention

        if self._snapshot_only:
            self._usage.assign_add(usage)
        else:
            self._usage = self._usage + usage

        return result
Пример #24
0
def squeeze_net_complex_skip(shape):
    inp = Input(shape=shape)
    conv1 = Conv2D(filters=96,
                   kernel_size=(7, 7),
                   strides=(2, 2),
                   activation='relu',
                   padding='same')(inp)
    pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(conv1)
    skip_conv1 = Conv2D(filters=128,
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        activation='relu')(pool1)
    fire1 = fire_module(16, 64, 64)(pool1)
    complex_add1 = Add()([fire1, skip_conv1])
    fire2 = fire_module(16, 64, 64)(complex_add1)
    add1 = Add()([complex_add1, fire2])
    fire3 = fire_module(32, 128, 128)(add1)
    skip_conv2 = Conv2D(filters=256,
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        activation='relu')(add1)
    complex_add2 = Add()([skip_conv2, fire3])
    pool2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(complex_add2)
    fire4 = fire_module(32, 128, 128)(pool2)
    add2 = Add()([pool2, fire4])
    skip_conv3 = Conv2D(filters=384,
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        activation='relu')(add2)
    fire5 = fire_module(48, 192, 192)(add2)
    complex_add3 = Add()([skip_conv3, fire5])
    fire6 = fire_module(48, 192, 192)(complex_add3)
    add3 = Add()([complex_add3, fire6])
    fire7 = fire_module(64, 256, 256)(add3)
    skip_conv4 = Conv2D(filters=512,
                        kernel_size=(1, 1),
                        strides=(1, 1),
                        activation='relu')(add3)
    complex_add4 = Add()([skip_conv4, fire7])
    pool3 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2))(complex_add4)
    fire8 = fire_module(64, 256, 256)(pool3)
    add4 = Add()([pool3, fire8])
    drop1 = Dropout(.5)(add4)

    # conv2 should have number of filters same as desired number of outputs
    conv2 = Conv2D(filters=1000,
                   kernel_size=(1, 1),
                   strides=(1, 1),
                   activation='relu')(drop1)
    pool4 = GlobalAveragePooling2D()(conv2)

    # activation functions should be changed to switch to regression
    out = softmax(pool4)

    model = Model(inputs=inp, outputs=[out])
    return model
    def call(self, hidden, timesteps):
        hidden_transformed = self.transform_hidden(hidden)
        hidden_repeated = K.repeat(hidden_transformed, timesteps)
        input_seq_transformed = self._input_seq_shaped
        alignment_score = self.calculate_alignment(hidden_repeated,
                                                   input_seq_transformed)
        score_vector = softmax(alignment_score, 1)
        context_vector = K.sum(score_vector * self.input_seq, 1)

        return context_vector
Пример #26
0
    def update_surrogate(self, trainable_variables, idx):
        with tf.GradientTape() as tape:
            adj = self.get_perturbed_adj()
            adj_norm = normalize_adj_tensor(adj)
            logit = self.surrogate([self.tf_x, adj_norm, idx])
            logit = softmax(logit)
            loss = self.compute_loss(logit)

        gradients = tape.gradient(loss, trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, trainable_variables))
Пример #27
0
def head_no_fc(x):
    '''
    No incluye fully connected, logra un
    +2.5~ en val_place_acc
    Sin los FC, evitamos un poco mas el overfitting
    '''
    x = block_no_activation(x, k=1, n_c=259, s=1, padding='same')
    x = GlobalAveragePooling2D()(x)
    x = Reshape((7, 37, 1))(x)
    return Lambda(lambda x: softmax(x, axis=-2))(x)
Пример #28
0
 def call(self, encoder_outputs, decoder_outputs, mask=None):
     w1_e = self.W1(encoder_outputs)
     w2_d = self.W2(decoder_outputs)
     tanh_output = activations.tanh(w1_e + w2_d)
     v_dot_tanh = self.V(tanh_output)
     if mask is not None:
         v_dot_tanh += (mask * -1e-9)
     attention_weights = activations.softmax(v_dot_tanh, axis=1)
     att_shape = tf.shape(attention_weights)
     return tf.reshape(attention_weights, (att_shape[0], att_shape[1]))
Пример #29
0
 def predict_step_on_batch(self,
                           x,
                           out_weight=None,
                           return_logits=True,
                           device="CPU"):
     with tf.device(device):
         out = self(x, training=False)
         out = gather(out, out_weight)
         if not return_logits:
             out = softmax(out)
     return out
Пример #30
0
    def compute_gradients(self, idx):
        with tf.GradientTape() as tape:
            tape.watch(self.adj_changes)
            adj = self.get_perturbed_adj()
            adj_norm = normalize_adj_tensor(adj)
            logit = self.surrogate([self.tf_x, adj_norm, idx])
            logit = softmax(logit)
            loss = self.compute_loss(logit)

        gradients = tape.gradient(loss, self.adj_changes)
        return gradients