def build(self, input_shape): if isinstance(input_shape, list): input_shape_high, input_shape_low = input_shape else: input_shape_high, input_shape_low = input_shape, None if self.data_format == 'channels_first': channel_axis, rows_axis, cols_axis = 1, 2, 3 else: rows_axis, cols_axis, channel_axis = 1, 2, 3 if input_shape_high[channel_axis] is None: raise ValueError('The channel dimension of the higher spatial inputs ' 'should be defined. Found `None`.') if input_shape_low is not None and input_shape_low[channel_axis] is None: raise ValueError('The channel dimension of the lower spatial inputs ' 'should be defined. Found `None`.') if input_shape_high[rows_axis] is not None and input_shape_high[rows_axis] % self.octave != 0 or \ input_shape_high[cols_axis] is not None and input_shape_high[cols_axis] % self.octave != 0: raise ValueError('The rows and columns of the higher spatial inputs should be divisible by the octave. ' 'Found {} and {}.'.format(input_shape_high, self.octave)) if input_shape_low is None: self.conv_low_to_high, self.conv_low_to_low = None, None if self.conv_high_to_high is not None: with K.name_scope(self.conv_high_to_high.name): self.conv_high_to_high.build(input_shape_high) if self.conv_low_to_high is not None: with K.name_scope(self.conv_low_to_high.name): self.conv_low_to_high.build(input_shape_low) if self.conv_high_to_low is not None: with K.name_scope(self.conv_high_to_low.name): self.conv_high_to_low.build(input_shape_high) if self.conv_low_to_low is not None: with K.name_scope(self.conv_low_to_low.name): self.conv_low_to_low.build(input_shape_low) super(OctaveConv2D, self).build(input_shape)
def build(self, input_shapes): """ Build the weights for the layer Args: input_shapes (sequence of tuple): the shapes of all input tensors """ vdim = input_shapes[0][2] edim = input_shapes[1][2] with kb.name_scope(self.name): with kb.name_scope("phi_v"): v_shapes = [[2 * vdim + edim, vdim]] * 2 self.phi_v_weights = [ self.add_weight( shape=i, initializer=self.kernel_initializer, name=f"weight_v_{j}", regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, ) for j, i in enumerate(v_shapes) ] if self.use_bias: self.phi_v_biases = [ self.add_weight( shape=(i[-1], ), initializer=self.bias_initializer, name=f"bias_v_{j}", regularizer=self.bias_regularizer, constraint=self.bias_constraint, ) for j, i in enumerate(v_shapes) ] else: self.phi_v_biases = None self.built = True
def build(self, input_shape): if isinstance(input_shape, list): input_shape_high, input_shape_low = input_shape else: input_shape_high, input_shape_low = input_shape, None if input_shape_high[-1] is None: raise ValueError( 'The channel dimension of the higher spatial inputs ' 'should be defined. Found `None`.') if input_shape_low is not None and input_shape_low[-1] is None: raise ValueError( 'The channel dimension of the lower spatial inputs ' 'should be defined. Found `None`.') if input_shape_high[ -2] is not None and input_shape_high[-2] % self.octave != 0: raise ValueError( 'The length of the higher spatial inputs should be divisible by the octave. ' 'Found {} and {}.'.format(input_shape_high, self.octave)) if input_shape_low is None: self.conv_low_to_high, self.conv_low_to_low = None, None if self.conv_high_to_high is not None: with K.name_scope(self.conv_high_to_high.name): self.conv_high_to_high.build(input_shape_high) if self.conv_low_to_high is not None: with K.name_scope(self.conv_low_to_high.name): self.conv_low_to_high.build(input_shape_low) if self.conv_high_to_low is not None: with K.name_scope(self.conv_high_to_low.name): self.conv_high_to_low.build(input_shape_high) if self.conv_low_to_low is not None: with K.name_scope(self.conv_low_to_low.name): self.conv_low_to_low.build(input_shape_low) super(OctaveConv1D, self).build(input_shape)
def _build_conv(self, input_shape, output_shape=[]): for i, kernel in enumerate(self.kernel_size): tmp_layer = [] name_conv = 'conv1D_{}'.format(i + 1) with K.name_scope(name_conv): tmp_layer.append( Conv1D(filters=self.nb_filters, kernel_size=kernel, strides=self.strides, padding=self.padding, use_bias=False, activation=self.activation, name=name_conv)) tmp_layer[-1].build(input_shape) output_shape_conv = tmp_layer[-1].compute_output_shape( input_shape) name_bn = 'batchNorm_{}'.format(i + 1) with K.name_scope(name_bn): tmp_layer.append(BatchNormalization(name=name_bn)) tmp_layer[-1].build(output_shape_conv) self.conv_layers.append(tmp_layer) output_shape.append( tmp_layer[-1].compute_output_shape(output_shape_conv))
def build(self, input_shapes): vdim = input_shapes[0][2] edim = input_shapes[1][2] with kb.name_scope(self.name): with kb.name_scope('phi_v'): v_shapes = [[2 * vdim + edim, vdim]] * 2 self.phi_v_weights = [ self.add_weight(shape=i, initializer=self.kernel_initializer, name='weight_v_%d' % j, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) for j, i in enumerate(v_shapes) ] if self.use_bias: self.phi_v_biases = [ self.add_weight(shape=(i[-1], ), initializer=self.bias_initializer, name='bias_v_%d' % j, regularizer=self.bias_regularizer, constraint=self.bias_constraint) for j, i in enumerate(v_shapes) ] else: self.phi_v_biases = None self.built = True
def build(self, input_shape): with K.name_scope( self.name ): # name scope used to make sure weights get unique names self.layers = [] self.res_output_shape = input_shape for k in range(2): name = 'conv1D_{}'.format(k) with K.name_scope( name ): # name scope used to make sure weights get unique names self._add_and_activate_layer( MyConv1D(filters=self.nb_filters, kernel_size=self.kernel_size, dilation_rate=self.dilation_rate, padding=self.padding, name=name, kernel_initializer=self.kernel_initializer)) with K.name_scope('norm_{}'.format(k)): if self.use_batch_norm: self._add_and_activate_layer(BatchNormalization()) elif self.use_layer_norm: self._add_and_activate_layer(LayerNormalization()) self._add_and_activate_layer(Activation('relu')) if self.nb_filters != input_shape[-1]: # 1x1 conv to match the shapes (channel dimension). name = 'matching_conv1D' with K.name_scope(name): # make and build this layer separately because it directly uses input_shape self.shape_match_conv = MyConv1D( filters=self.nb_filters, kernel_size=1, padding='same', name=name, kernel_initializer=self.kernel_initializer) else: name = 'matching_identity' self.shape_match_conv = Lambda(lambda x: x, name=name) with K.name_scope(name): self.shape_match_conv.build(input_shape) self.res_output_shape = self.shape_match_conv.compute_output_shape( input_shape) self.final_activation = Activation(self.activation) self.final_activation.build( self.res_output_shape) # probably isn't necessary # this is done to force Keras to add the layers in the list to self._layers for layer in self.layers: self.__setattr__(layer.name, layer) self.__setattr__(self.shape_match_conv.name, self.shape_match_conv) self.__setattr__(self.final_activation.name, self.final_activation) super(ResidualBlock, self).build( input_shape) # done to make sure self.built is set True
def _adjust_block(p, ip, filters, weight_decay=5e-5, id=None): ''' Adjusts the input `p` to match the shape of the `input` or situations where the output number of filters needs to be changed # Arguments: p: input tensor which needs to be modified ip: input tensor whose shape needs to be matched filters: number of output filters to be matched weight_decay: l2 regularization weight id: string id # Returns: an adjusted Keras tensor ''' channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 img_dim = 2 if K.image_data_format() == 'channels_first' else -2 with K.name_scope('adjust_block'): if p is None: p = ip elif p._keras_shape[img_dim] != ip._keras_shape[img_dim]: with K.name_scope('adjust_reduction_block_%s' % id): p = Activation('relu', name='adjust_relu_1_%s' % id)(p) p1 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % id)(p) p1 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name='adjust_conv_1_%s' % id, kernel_initializer='he_normal')(p1) p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p) p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2) p2 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % id)(p2) p2 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), name='adjust_conv_2_%s' % id, kernel_initializer='he_normal')(p2) p = concatenate([p1, p2], axis=channel_dim) p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='adjust_bn_%s' % id)(p) elif p._keras_shape[channel_dim] != filters: with K.name_scope('adjust_projection_block_%s' % id): p = Activation('relu')(p) p = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='adjust_conv_projection_%s' % id, use_bias=False, kernel_regularizer=l2(weight_decay), kernel_initializer='he_normal')(p) p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name='adjust_bn_%s' % id)(p) return p
def make_model(self): with k.name_scope("SVM_features"): svm_features = Input(shape=(self.svm_dims, ), name="svm_features") svm_input = Dense(128, activation=None, name="svm_dense")(svm_features) svm_input = LeakyReLU()(svm_input) with k.name_scope("CNN"): lstm_features = Input(shape=(None, self.input_shape, 1), name="lstm_features") # lstm_skip = Lambda(lambda t: t[:, 0:-1:2, :])(lstm_features) lstm_mask = Masking(mask_value=Config.MASKING_VALUE, input_shape=(self.time_steps, self.input_shape, 1))(lstm_features) # [None, T, F] lstm_mask = Lambda(lambda t: t)(lstm_mask) conv1 = Conv2D(4, (3, 1), padding="same", name="conv1")(lstm_mask) conv1 = BatchNormalization()(conv1) conv1 = LeakyReLU()(conv1) conv1 = Dropout(rate=0.3)(conv1) conv1_pool = MaxPooling2D(pool_size=(1, 2), name="maxpooling1")(conv1) conv_reshape = Lambda(lambda t: tf.concat(tf.unstack(t, axis=-1), axis=-1))(conv1_pool) print(conv_reshape) # conv_reshape = Reshape(target_shape = (-1, 16))(conv1_pool) with k.name_scope("LSTM"): conv_dense = Dense(16, activation=None, name="conv_dense")(conv_reshape) conv_dense = LeakyReLU()(conv_dense) lstm_output = LSTM(Config.LSTM_UNITS, return_sequences=True, name="lstm_sequence")(conv_dense) lstm_output_last = LSTM(Config.LSTM_UNITS, return_sequences=False, name="lstm_last_output")(conv_dense) with k.name_scope("Concatenate"): x = concatenate([lstm_output_last, svm_input]) x_dense = Dense(128, activation=None)(x) x_dense = LeakyReLU()(x_dense) # batchnorm1 = BatchNormalization()(x) # dropout1 = Dropout(rate = 0.3)(batchnorm1) dense_2 = Dense(128, activation=None)(x_dense) dense_2 = LeakyReLU()(dense_2) batchnorm2 = BatchNormalization()(dense_2) dropout = Dropout(rate=0.3)(batchnorm2) pred = Dense(self.num_classes, activation="softmax", name="output")(dropout) self.model = Model(inputs=[svm_features, lstm_features], outputs=[pred]) return self.model
def create_shared_weights(conv1, conv2, input_shape): with K.name_scope(conv1.name): conv1.build(input_shape) with K.name_scope(conv2.name): conv2.build(input_shape) conv2.kernel = conv1.kernel conv2.bias = conv1.bias conv2._trainable_weights = [] conv2._trainable_weights.append(conv2.kernel) conv2._trainable_weights.append(conv2.bias)
def build(self, input_shape): with K.name_scope(self.name): # name scope used to make sure weights get unique names self.layers = [] self.res_output_shape = input_shape for k in range(2): with K.name_scope('conv1D_{}'.format(k)): # conv1D_0 or conv1D_1 self._add_and_activate_layer(Conv1D(filters=self.nb_filters, kernel_size=self.kernel_size, dilation_rate=self.dilation_rate, padding=self.padding, name='conv1D_{}'.format(k), kernel_initializer=self.kernel_initializer)) #self._add_and_activate_layer(MaxPooling1D(pool_size=3)) with K.name_scope('norm_{}'.format(k)): # norm_0 or norm_1 if self.use_batch_norm: self._add_and_activate_layer(BatchNormalization()) print('use batch_norm') elif self.use_layer_norm: self._add_and_activate_layer(LayerNormalization()) print('use layer_norm') self._add_and_activate_layer(Activation('relu')) self._add_and_activate_layer(SpatialDropout1D(rate=self.dropout_rate)) if not self.last_block: # 1x1 conv to match the shapes (channel dimension). name = 'conv1D_{}'.format(k + 1) with K.name_scope(name): # make and build this layer separately because it directly uses input_shape self.shape_match_conv = Conv1D(filters=self.nb_filters, kernel_size=1, padding='same', # len(input) == len(output) name=name, kernel_initializer=self.kernel_initializer) else: self.shape_match_conv = Lambda(lambda x: x, name='identity') # Lambda : Layer로 감싸줌 self.shape_match_conv.build(input_shape) self.res_output_shape = self.shape_match_conv.compute_output_shape(input_shape) self.final_activation = Activation(self.activation) self.final_activation.build(self.res_output_shape) # probably isn't necessary # this is done to force Keras to add the layers in the list to self._layers for layer in self.layers: self.__setattr__(layer.name, layer) super(ResidualBlock, self).build(input_shape) # done to make sure self.built is set True
def make_model(self): with k.name_scope("SVM_features"): svm_features = Input(shape = (self.svm_dims,), name = "svm_features") svm_input = Dense(128, activation = None, name = "svm_dense")(svm_features) svm_input = LeakyReLU()(svm_input) with k.name_scope("LSTM_features"): lstm_features = Input(shape = (None, self.input_shape), name = "lstm_features") lstm_mask = Masking(mask_value = Config.MASKING_VALUE, input_shape = (self.time_steps, self.input_shape))(lstm_features) lstm_seq = Bidirectional(LSTM(Config.LSTM_UNITS, return_sequences = True, name = "lstm_sequence"))(lstm_mask) lstm_output = Lambda(lambda t: tensorflow.reduce_mean(t,1))(lstm_seq)
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., weight_decay=0., amsgrad=False, total_steps=0, warmup_proportion=0.1, min_lr=0., **kwargs): super(RAdam, self).__init__(name='RAdam', **kwargs) with K.name_scope(self.__class__.__name__): self._iterations = K.variable(0, dtype='int64', name='iterations') self._lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.weight_decay = K.variable(weight_decay, name='weight_decay') self.total_steps = K.variable(total_steps, name='total_steps') self.warmup_proportion = K.variable(warmup_proportion, name='warmup_proportion') self.min_lr = K.variable(min_lr, name='min_lr') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.initial_weight_decay = weight_decay self.initial_total_steps = total_steps self.amsgrad = amsgrad
def __init__(self, lr=1e-1, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0., amsgrad=False, partial=1. / 8., **kwargs): if partial < 0 or partial > 0.5: raise ValueError( "Padam: 'partial' must be a positive float with a maximum " "value of `0.5`, since higher values will cause divergence " "during training.") super(Padam, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.partial = partial self.initial_decay = decay self.amsgrad = amsgrad
def __init__(self, optimizer, sync_period=5, slow_step=0.5, **kwargs): super(Lookahead, self).__init__(**kwargs) self.optimizer = keras.optimizers.get(optimizer) with K.name_scope(self.__class__.__name__): self.sync_period = K.variable( sync_period, dtype='int64', name='sync_period') self.slow_step = K.variable(slow_step, name='slow_step')
def __init__(self, lr=1e-3, beta_1=0.9, beta_2=0.999, final_lr=0.1, epsilon=None, decay=0, amsbound=False, weight_decay=0.0, **kwargs): super(AdaBound, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.initial_decay = decay self.weight_decay = float(weight_decay) self.base_lr = float(lr) self.final_lr = final_lr if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.amsbound = amsbound
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, accum_iters=1, **kwargs): if accum_iters < 1: raise ValueError('accum_iters must be >= 1') super(AdamAccumulate, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsgrad = amsgrad self.accum_iters = K.variable(accum_iters, K.dtype(self.iterations)) self.accum_iters_float = K.cast(self.accum_iters, K.floatx())
def build(self, input_shape): with K.name_scope(self.name): self.attn_block = AttentionBlock(w_dim=self.w_dim, name=f'AttentionBlock_{self.stack}_{self.dilation}') super(SpatialBlock, self).build(input_shape) # done to make sure self.built is set True
def _separable_conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), weight_decay=5e-5, id=None): '''Adds 2 blocks of [relu-separable conv-batchnorm] # Arguments: ip: input tensor filters: number of output filters per layer kernel_size: kernel size of separable convolutions strides: strided convolution for downsampling weight_decay: l2 regularization weight id: string id # Returns: a Keras tensor ''' channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('separable_conv_block_%s' % id): x = Activation('relu')(ip) x = SeparableConv2D(filters, kernel_size, strides=strides, name='separable_conv_1_%s' % id, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name="separable_conv_1_bn_%s" % id)(x) x = Activation('relu')(x) x = SeparableConv2D(filters, kernel_size, name='separable_conv_2_%s' % id, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, name="separable_conv_2_bn_%s" % id)(x) return x
def get_updates(self, loss, params): if len(self.updates) > 0: return self.updates multiplies = {} for param in params: multiplier = self._get_multiplier(param.name) if multiplier not in multiplies: multiplies[multiplier] = [] multiplies[multiplier].append(param) self.updates, self.weights = [], [] origin_lr = getattr(self, self.lr_attr) for i, (multiplier, params) in enumerate(multiplies.items()): lr = origin_lr if callable(multiplier): lr = lr * multiplier(K.cast(self.optimizer.iterations, K.floatx())) elif multiplier != 1.0: lr = lr * multiplier setattr(self, self.lr_attr, lr) with K.name_scope('Group_{}'.format(i)): self.updates += self.optimizer.get_updates(loss, params) print(self.multipliers, i, self.optimizer.weights) for w in self.optimizer.weights: if w not in self.weights: self.weights.append(w) setattr(self, self.lr_attr, origin_lr) return self.updates
def Encoder1(input_shape, base_dim, kernel_size, num_scale, block_per_scale, depth_per_block, fc_dim, latent_dim, name='Encoder1'): with K.name_scope(name): dim = base_dim enc_input = Input(shape=input_shape) y = Conv2D(dim, kernel_size, padding='same', strides=2)(enc_input) for i in range(num_scale - 1): y = ScaleBlock(dim, block_per_scale, depth_per_block, kernel_size)(y) if i != num_scale - 1: dim *= 2 y = Conv2D(dim, kernel_size, strides=2, padding='same')(y) y = GlobalAveragePooling2D()(y) y = ScaleFcBlock(fc_dim, 1, depth_per_block)(y) mu_z = Dense(latent_dim)(y) logsd_z = Dense(latent_dim)(y) logvar_z = 2 * logsd_z sd_z = tf.exp(logsd_z) z = mu_z + K.random_normal(shape=(K.shape(mu_z)[0], latent_dim)) * sd_z encoder = Model(enc_input, [mu_z, logvar_z, z]) return encoder
def __init__(self, optimizer, steps_per_update=1, **kwargs): assert float( tf.__version__[:4] ) <= 1.13, "Please make sure that your tensorflow version is 1.13.x or lower." super(AccumOptimizer, self).__init__(**kwargs) self.optimizer = optimizer with K.name_scope(self.__class__.__name__): self.steps_per_update = steps_per_update self.iterations = K.variable(0, dtype='int64', name='iterations') self.cond = K.equal(self.iterations % self.steps_per_update, 0) self.lr = self.optimizer.lr self.optimizer.lr = K.switch(self.cond, self.optimizer.lr, 0.) for attr in ['momentum', 'rho', 'beta_1', 'beta_2']: if hasattr(self.optimizer, attr): value = getattr(self.optimizer, attr) setattr(self, attr, value) setattr(self.optimizer, attr, K.switch(self.cond, value, 1 - 1e-7)) for attr in self.optimizer.get_config(): if not hasattr(self, attr): value = getattr(self.optimizer, attr) setattr(self, attr, value) # Cover the original get_gradients method with accumulative gradients. def get_gradients(loss, params): return [ag / self.steps_per_update for ag in self.accum_grads] self.optimizer.get_gradients = get_gradients
def __init__(self, lr=0.001, final_lr=0.1, beta_1=0.9, beta_2=0.999, gamma=1e-3, epsilon=None, decay=0., amsbound=False, weight_decay=0.0, **kwargs): super(AdaBound, self).__init__(**kwargs) if not 0. <= gamma <= 1.: raise ValueError( "Invalid `gamma` parameter. Must lie in [0, 1] range.") with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.final_lr = final_lr self.gamma = gamma if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsbound = amsbound self.weight_decay = float(weight_decay) self.base_lr = float(lr)
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, clips={}, verbose=0, **kwargs): super(AdamwithClip, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.clips = kdict(clips, 'clips') self.clips_val = clips if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsgrad = amsgrad self.verbose = verbose
def _build_residual(self, input_shape): self.residual_layers.append( MaxPooling1D(pool_size=3, strides=self.strides, padding=self.padding)) self.residual_layers[-1].build(input_shape) output_shape = self.residual_layers[-1].compute_output_shape( input_shape) name = 'conv1D_bottleneck_1' with K.name_scope(name): self.residual_layers.append( Conv1D(filters=self.nb_filters, kernel_size=1, strides=self.strides, padding=self.padding, use_bias=False, activation=self.activation, name=name)) self.residual_layers[-1].build(output_shape) output_shape = self.residual_layers[-1].compute_output_shape( output_shape) return output_shape
def decoder( self, inputs, mid_filters=512, out_filters=256, activation="relu", block_name="decoder", ): """ Create a decoder block Args: inputs (tensorflow.python.framework.ops.Tensor): inputs to the block mid_filters (int): : number of mid filters out_filters (int) : number of output filters activation (str): : activation function block_name (str): : name of the block to use Returns: A tensorflow.python.framework.ops.Tensor object """ with K.name_scope(block_name): if activation == "leaky_relu": activation = None conv = LeakyReLU(alpha=0.3)(self.conv_act( inputs, mid_filters, activation)) else: conv = self.conv_act(inputs, mid_filters, activation) conv_tr = Conv2DTranspose( filters=out_filters, activation=activation, kernel_size=4, strides=2, padding="same", )(conv) return conv_tr
def add_regularisation(self, weights): """ Given a batch of multi-head attention weights of shape (r*b, l_q, l_k), where b is the batch size, r is the number of attention heads, l_q is the query (Q) length and l_k is the key (K) length, group all attention vectors corresponding to the same word in Q, and for each attention group $A$ of shape (r, l_k), calculate $| A \times {A}^{T} - I |$. Here $| |$ denotes the Frobenius norm (the L2 matrix norm) and $I$ denotes the identity matrix of rank r. """ rb, l_q, l_k = K.int_shape(weights) attention_groups = ops.group_attentions(self.r, weights) # flatten the batch axis to produce a tensor of [r, l_k] attention # groups groups = K.reshape(attention_groups, [-1, self.r, l_k]) # calculate $A \times A^T$ - similarity between attention weights similarity = K.batch_dot(groups, groups, axes=[2, 2]) # subtract an identity matrix to enforce sparsity norms = ops.frobenius_norm(similarity - tf.eye(self.r, dtype=K.floatx()), axes=[1, 2]) # restore batch-structure, calculate average loss contribution # across all time-steps in a sequence and multiple by self.regularise with K.name_scope('activity_regularizer'): loss_contributions = ( self.regularise * K.mean(K.reshape(norms, [-1, l_q]), axis=None)) self.add_loss([loss_contributions], inputs=True)
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., weight_decay=0.025, batch_size=1, samples_per_epoch=1, epochs=1, lr_mult=0.1, excluded_vars=[], **kwargs): super().__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.weight_decay = K.variable(weight_decay, name='weight_decay') self.batch_size = K.variable(batch_size, name='batch_size') self.samples_per_epoch = K.variable(samples_per_epoch, name='samples_per_epoch') self.epochs = K.variable(epochs, name='epochs') self.lr_mult = lr_mult self.excluded_vars = excluded_vars if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay
def Decoder1(inp_shape, latent_dim, dims, scales, kernel_size, block_per_scale, depth_per_block, name='Decoder1'): base_wh = 4 fc_dim = base_wh * base_wh * dims[0] data_depth = inp_shape[-1] with K.name_scope(name): dec_input = Input(shape=(latent_dim, )) y = Dense(fc_dim)(dec_input) y = Reshape((base_wh, base_wh, dims[0]))(y) for i in range(len(scales) - 1): y = Conv2DTranspose(dims[i + 1], kernel_size, strides=2, padding='same')(y) if not (i == len(scales) - 2): y = ScaleBlock(dims[i + 1], block_per_scale, depth_per_block, kernel_size)(y) x_hat = Conv2D(data_depth, kernel_size, 1, padding='same', activation='sigmoid')(y) decoder1 = Model(dec_input, x_hat) return (decoder1)
def make_model(self): with k.name_scope("SVM_features"): svm_features = Input(shape = (self.svm_dims,), name = "svm_features") svm_input = Dense(128, activation = "tanh", name = "svm_dense")(svm_features) # svm_input = LeakyReLU()(svm_input) with k.name_scope("LSTM_features"): lstm_features = Input(shape = (None, self.input_shape), name = "lstm_features") lstm_mask = Masking(mask_value = Config.MASKING_VALUE, input_shape = (self.time_steps, self.input_shape))(lstm_features) lstm_output, state_h, state_c = LSTM(Config.LSTM_UNITS, return_sequences = True, return_state = True, name = "lstm_sequence")(lstm_mask) # lstm_output_last = LSTM(Config.LSTM_UNITS, return_sequences = False, name = "lstm_last_output")(lstm_mask) with k.name_scope("AttentionLayer_1"): __, lstm_output_ex_last = Lambda(lambda t: [t, t[:, :-1, :]], name = "lstm_T1_Tn-1")(lstm_output) lstm_output_last = state_h attention_weights1 = dot([lstm_output_last, lstm_output_ex_last], name = "attention_weights1", axes = -1) # [B, 1, M] attention_weights2 = Activation("softmax", name = "attention_weights2")(attention_weights1) lstm_attention = dot([attention_weights2, lstm_output_ex_last], name = "lstm_attention", axes = 1) # final_attention = concatenate([lstm_attention, lstm_output_last]) print(lstm_attention) """ with k.name_scope("AttentionLayer_2"): # Attention layer 2 - attention params input_attention = Input(shape = (Config.ATTENTION_UNITS, ), name = "attention_params") u = Dense(Config.ATTENTION_UNITS, activation = "softmax", name = "attention_u")(input_attention) alpha = dot([u, lstm_output], axes = -1) alpha = Activation("softmax", name = "attention_weights")(alpha) # weighted pool lstm_attention = dot([alpha, lstm_output], name = "attention_output", axes = 1) """ with k.name_scope("Concatenate"): x = concatenate([lstm_attention, svm_input]) x_dense = Dense(128, activation = "tanh")(x) # x_dense = LeakyReLU()(x_dense) dense_2 = Dense(128, activation = "tanh")(x_dense) batchnorm2 = BatchNormalization()(dense_2) dropout = Dropout(rate = 0.3, name = "dropout")(batchnorm2) pred = Dense(self.num_classes, activation = "softmax", name = "output")(dropout) self.model = Model(inputs = [svm_features, lstm_features], outputs = [pred]) return self.model
def inject(self, model): """Inject the Lookahead algorithm for the given model. The following code is modified from keras's _make_train_function method. See: https://github.com/keras-team/keras/blob/master/keras/engine/training.py#L497 """ if not hasattr(model, 'train_function'): raise RuntimeError('You must compile your model before using it.') model._check_trainable_weights_consistency() if model.train_function is None: inputs = (model._feed_inputs + model._feed_targets + model._feed_sample_weights) if model._uses_dynamic_learning_phase(): inputs += [K.learning_phase()] fast_params = model._collected_trainable_weights with K.name_scope('training'): with K.name_scope(model.optimizer.__class__.__name__): training_updates = model.optimizer.get_updates( params=fast_params, loss=model.total_loss) slow_params = [K.variable(p) for p in fast_params] fast_updates = (model.updates + training_updates + model.metrics_updates) slow_updates, copy_updates = [], [] for p, q in zip(fast_params, slow_params): slow_updates.append(K.update(q, q + self.alpha * (p - q))) copy_updates.append(K.update(p, q)) # Gets loss and metrics. Updates weights at each call. fast_train_function = K.function(inputs, [model.total_loss] + model.metrics_tensors, updates=fast_updates, name='fast_train_function', **model._function_kwargs) def F(inputs): self.count += 1 R = fast_train_function(inputs) if self.count % self.k == 0: K.batch_get_value(slow_updates) K.batch_get_value(copy_updates) return R model.train_function = F