def get_evaluation_context_getter(): if K.backend() == 'tensorflow': import tensorflow as tf return tf.get_default_graph().as_default if K.backend() == 'theano': return contextmanager(lambda: (yield))
def call(self, inputs, **kwargs): padding = self.padding pool_size = self.pool_size strides = self.strides if K.backend() == "tensorflow": ksize = [1, pool_size[0], pool_size[1], 1] padding = padding.upper() strides = [1, strides[0], strides[1], 1] output, argmax = tf.nn.max_pool_with_argmax(inputs, ksize=ksize, strides=strides, padding=padding) else: errmsg = "{} backend is not supported for layer {}".format( K.backend(), type(self).__name__) raise NotImplementedError(errmsg) argmax = K.cast(argmax, K.floatx()) return [output, argmax]
def __init__(self, k_1=0.01, k_2=0.03, kernel_size=3, max_value=1.0): self.__name__ = 'DSSIMObjective' self.kernel_size = kernel_size self.k_1 = k_1 self.k_2 = k_2 self.max_value = max_value self.c_1 = (self.k_1 * self.max_value)**2 self.c_2 = (self.k_2 * self.max_value)**2 self.dim_ordering = K.image_data_format() self.backend = K.backend()
def gather_each_row(params, indices): n = K.shape(indices)[0] # if K.backend() == 'theano': # from theano import tensor as T # return params[T.arange(n), indices] if K.backend() == 'tensorflow': indices = K.transpose(K.stack([tf.range(n), indices])) return tf.gather_nd(params, indices) else: raise NotImplementedError
def _moments(self, x): axes = range(len(K.int_shape(x)) - 1) if K.backend() == "tensorflow": return tf.nn.moments(x=x, axes=axes) else: # TODO: Maybe the following can be optimized a bit? mean = K.mean(K.reshape(x, (-1, self.dim)), axis=0) var = K.var(K.reshape(x, (-1, self.dim)), axis=0) return mean, var
def load_weight(self): RESNET50_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', RESNET50_WEIGHTS_PATH_NO_TOP, cache_subdir='models') self.load_weights(weights_path, by_name=True) if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you are using the Theano ' 'image data format convention (`image_data_format="channels_first"`). ' 'For best performance, set `image_data_format="channels_last"` in ' 'your Keras config at ~/.keras/keras.json.')
def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 14 input_shape = (num_rois, 14, 14, 1024) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois, 1024, 7, 7) out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True) out = TimeDistributed(Flatten())(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def set_parallelism_threads(): """ Set the number of parallel threads according to the number available on the hardware """ if K.backend() == 'tensorflow' and 'NUM_INTRA_THREADS' in os.environ and 'NUM_INTER_THREADS' in os.environ: import tensorflow as tf # print('Using Thread Parallelism: {} NUM_INTRA_THREADS, {} NUM_INTER_THREADS'.format(os.environ['NUM_INTRA_THREADS'], os.environ['NUM_INTER_THREADS'])) session_conf = tf.ConfigProto(inter_op_parallelism_threads=int(os.environ['NUM_INTER_THREADS']), intra_op_parallelism_threads=int(os.environ['NUM_INTRA_THREADS'])) sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) K.set_session(sess)
def __init__(self, backbone, experiment_name, dataset=None, class_mode='binary'): # Clear session variables if K.backend() == 'tensorflow': K.clear_session() # Set up root paths self.models_root_path = 'models' self.data_root = 'datasets' self.logs_root_path = 'logs' # Set up model path self.model_name = experiment_name + '.h5' self.model_path = os.path.join(self.models_root_path, self.model_name) # Set up log path self.logs_name = experiment_name self.logs_path = os.path.join(self.logs_root_path, self.logs_name) # Set up model backbone self.backbone = backbone # Set image and data variables self.height = 224 self.width = 224 self.batch_size = 8 self.shuffle_generator = True # Set default training variables self.init_lrate = 0.0001 self.finetuning_layers = 20 self.from_scratch = False self.augment_data = True self.class_weights = 'balanced' self.warmup_epochs = 5 self.training_epochs = 25 # Set default callbacks self.earlystopping = False self.earlystop_patience = 10 self.log_to_tensorboard = False self.log_to_csv = True self.use_lr_decay = False self.use_lr_plateau = True # Create metrics log directory if not available self.metrics_path = os.path.join(self.logs_path, 'metrics') if not os.path.exists(self.metrics_path): os.makedirs(self.metrics_path) # Set up dataset with default paths if dataset is not None: self.setup_dataset(class_mode, dataset)
def block3(x, filters, kernel_size=3, stride=1, groups=32, conv_shortcut=True, name=None): """A residual block. # Arguments x: input tensor. filters: integer, filters of the bottleneck layer. kernel_size: default 3, kernel size of the bottleneck layer. stride: default 1, stride of the first layer. groups: default 32, group size for grouped convolution. conv_shortcut: default True, use convolution shortcut if True, otherwise identity shortcut. name: string, block label. # Returns Output tensor for the residual block. """ bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 if conv_shortcut is True: shortcut = layers.Conv2D((64 // groups) * filters, 1, strides=stride, use_bias=False, name=name + '_0_conv')(x) shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut) else: shortcut = x x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x) x = layers.Activation('relu', name=name + '_1_relu')(x) c = filters // groups x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) x = layers.DepthwiseConv2D(kernel_size, strides=stride, depth_multiplier=c, use_bias=False, name=name + '_2_conv')(x) x_shape = backend.int_shape(x)[1:-1] x = layers.Reshape(x_shape + (groups, c, c))(x) output_shape = x_shape + (groups, c) if backend.backend() == 'theano' else None x = layers.Lambda(lambda x: sum([x[:, :, :, :, i] for i in range(c)]), output_shape=output_shape, name=name + '_2_reduce')(x) x = layers.Reshape(x_shape + (filters,))(x) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x) x = layers.Activation('relu', name=name + '_2_relu')(x) x = layers.Conv2D((64 // groups) * filters, 1, use_bias=False, name=name + '_3_conv')(x) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_3_bn')(x) x = layers.Add(name=name + '_add')([shortcut, x]) x = layers.Activation('relu', name=name + '_out')(x) return x
def gelu(x: np.ndarray) -> np.ndarray: ''' Gelu function Args: x (np.ndarray): input to the function Returns: gelu result from x ''' if K.backend() == 'tensorflow': return 0.5 * x * (1.0 + tf.math.erf(x / tf.sqrt(2.0))) return 0.5 * x * ( 1.0 + K.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * K.pow(x, 3))))
def call(self, inputs, **kwargs): inputs, tasks = inputs if K.dtype(tasks) != 'int32': tasks = K.cast(tasks, 'int32') task_embed = K.gather(self.embeddings, tasks) if self.mask_zero: task_embed = task_embed * K.expand_dims( K.cast(K.not_equal(tasks, 0), K.floatx()), axis=-1) if K.backend() == 'theano': task_embed = K.tile(task_embed, (1, K.shape(inputs)[1], 1)) return inputs + task_embed
def inner_loss(y_true, y_pred): # Branching because tensorflows broadcasting is wonky and # plaidmls concatenate is implemented ineficient. if K.backend() == "plaidml.keras.backend": n_true = y_true * mask n_pred = y_pred * mask else: n_true = K.concatenate( [y_true[:, :, :, i:i + 1] * mask for i in range(3)], axis=-1) n_pred = K.concatenate( [y_pred[:, :, :, i:i + 1] * mask for i in range(3)], axis=-1) return loss_func(n_true, n_pred)
def test_different_backends_work(self): for use_attn_mask in [True, False]: orig_backend = K.backend() for backend in self.list_backends(orig_backend): try: set_keras_backend(backend) except ModuleNotFoundError: pass K.set_learning_phase(0) # test model = self.create_small_model(use_attn_mask) del model set_keras_backend(orig_backend)
def get_initial_state(self, x): #input_shape = self.input_spec[0].shape input_shape = x.shape.as_list() init_nb_row = input_shape[self.row_axis] init_nb_col = input_shape[self.column_axis] base_initial_state = K.zeros_like( x) # (samples, timesteps) + image_shape non_channel_axis = -1 if self.data_format == 'channels_first' else -2 for _ in range(2): base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) initial_states = [] states_to_pass = ['r', 'c', 'e'] nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} if self.extrap_start_time is not None: states_to_pass.append( 'ahat' ) # pass prediction in states so can use as actual for t+1 when extrapolating nlayers_to_pass['ahat'] = 1 for u in states_to_pass: for l in range(nlayers_to_pass[u]): ds_factor = 2**l nb_row = init_nb_row // ds_factor nb_col = init_nb_col // ds_factor if u in ['r', 'c']: stack_size = self.R_stack_sizes[l] elif u == 'e': stack_size = 2 * self.stack_sizes[l] elif u == 'ahat': stack_size = self.stack_sizes[l] output_size = stack_size * nb_row * nb_col # flattened size reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) if self.data_format == 'channels_first': output_shp = (-1, stack_size, nb_row, nb_col) else: output_shp = (-1, nb_row, nb_col, stack_size) initial_state = K.reshape(initial_state, output_shp) initial_states += [initial_state] if self.extrap_start_time is not None: initial_states += [ K.variable(0, int if K.backend() != 'tensorflow' else 'int32') ] # the last state will correspond to the current timestep return initial_states
def make_asserts(architecture, kernel_size, total_depth, num_computations, num_channels, boundary, dataset, batch_size): """ make_asserts Assert that experiment constants are valid # Conditions - kernel_size, num_channels, total_depth, batch_size must be integers - architecture must be 'vanilla' or 'bn_ff' or 'bn_res' - dataset must be 'cifar10' or 'mnist' - boundary must be 'periodic' or 'symmetric' or 'zero_padding' - 'symmetric' boundary only compatible with odd kernel size - total depth must be a multiple of the number of moment computations - data format must be 'channels_last' - Keras backend must be 'tensorflow' or 'theano' """ assert (type(kernel_size) is int) and (type(num_channels) is int) and \ (type(total_depth) is int) and (type(batch_size) is int), \ "kernel_size, num_channels, total_depth, batch_size must be integers" assert (architecture in ['vanilla', 'bn_ff', 'bn_res']), \ "architecture must be 'vanilla' or 'bn_ff' or 'bn_res'" assert (dataset in ['cifar10', 'mnist']), \ "dataset must be 'cifar10' or 'mnist'" assert (boundary in ['periodic', 'symmetric', 'zero_padding']), \ "boundary must be 'periodic' or 'symmetric' or 'zero_padding'" assert not ((boundary == 'symmetric') and (kernel_size % 2 == 0)), \ "'symmetric' boundary only compatible with odd kernel size" assert (total_depth % num_computations == 0), \ "total depth must be a multiple of the number of moment computations" assert (K.image_data_format() == 'channels_last'), \ "data format must be 'channels_last'" assert (K.backend() == 'tensorflow') or (K.backend() == 'theano'), \ "keras backend must be 'tensorflow' or 'theano'"
def classifier(base_layers, input_rois, num_rois, nb_classes=21, trainable=False): # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround if K.backend() == 'tensorflow': pooling_regions = 7 input_shape = (num_rois, 7, 7, 1024) elif K.backend() == 'theano': pooling_regions = 7 input_shape = (num_rois, 1024, 7, 7) x = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois]) # pool inputs to save memory. #x = TimeDistributed(Convolution2D(1024, (3, 3), name='lastconv', padding="same"))(out_roi_pool) #x = Activation('relu')(x) x = TimeDistributed(AveragePooling2D((7, 7)), name='avg_pool')(x) out = TimeDistributed(Flatten(name='flatten'))(x) out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out) out = TimeDistributed(Dropout(0.5))(out) out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out) out = TimeDistributed(Dropout(0.5))(out) out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out) # note: no regression target for bg class out_regr = TimeDistributed(Dense(4 * (nb_classes - 1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out) return [out_class, out_regr]
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
def call(self, x, mask=None): if K.backend() == 'theano': pos = K.relu(x) * ( K.pattern_broadcast(self.alpha, self.param_broadcast) / K.pattern_broadcast(self.beta, self.param_broadcast)) neg = ( K.pattern_broadcast(self.alpha, self.param_broadcast) * (K.exp( (-K.relu(-x)) / K.pattern_broadcast(self.beta, self.param_broadcast)) - 1)) else: pos = K.relu(x) * self.alpha / self.beta neg = self.alpha * (K.exp((-K.relu(-x)) / self.beta) - 1) return neg + pos
def train_model(self, model): batch_size = 128 accuracies, f1_scores = [], [] train_images, train_labels, test_images, \ test_labels, validation_images, validation_labels = self.load_data() train_ds = tf.data.Dataset.from_tensor_slices( (train_images, train_labels)).batch(batch_size, drop_remainder=True) validation_ds = tf.data.Dataset.from_tensor_slices( (validation_images, validation_labels)).batch(batch_size, drop_remainder=True) # Train three times for i in range(3): # To free memory on google colab. if K.backend() == 'tensorflow': K.clear_session() print('Trainning %s of 3' % (i + 1)) # Early Stop when bad networks are identified es = callbacks.EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=10, baseline=0.5) model.fit(train_ds, epochs=70, batch_size=batch_size, verbose=0, validation_data=validation_ds, callbacks=[es]) loss, accuracy, f1_score = model.evaluate(test_images, test_labels, verbose=1) accuracies.append(accuracy) f1_scores.append(f1_score) if i == 0 and accuracy < 0.5: break return np.mean(accuracies), np.std(accuracies), np.mean( f1_scores), np.std(f1_scores)
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def multihead_attention(x, attn_mask, n_head: int, n_state: int, attention_dropout: float, neg_inf: float): _q, _k, _v = x[:, :, :n_state], x[:, :, n_state:2 * n_state], x[:, :, -n_state:] q = split_heads(_q, n_head) # B, H, L, C//H k = split_heads(_k, n_head, k=True) # B, H, C//H, L v = split_heads(_v, n_head) # B, H, L, C//H if K.backend() == 'tensorflow': a = scaled_dot_product_attention_tf(q, k, v, attn_mask, attention_dropout, neg_inf) else: a = scaled_dot_product_attention_th(q, k, v, attn_mask, attention_dropout, neg_inf) return merge_heads(a)
def convert(in_path, out_path, no_tests=False): """Convert any (h5-)stored Keras model to the frugally-deep model format.""" assert K.backend() == "tensorflow" assert K.floatx() == "float32" assert K.image_data_format() == 'channels_last' print('loading {}'.format(in_path)) model = load_model(in_path) json_output = model_to_fdeep_json(model, no_tests) print('writing {}'.format(out_path)) write_text_file( out_path, json.dumps(json_output, allow_nan=False, indent=2, sort_keys=True))
def call(self, x, mask=None): b, xb = 0., 0. if self.data_format == 'channels_first': kernel_sum_axes = [1, 2, 3] if self.use_bias: b = K.reshape(self.b, (self.filters, 1, 1, 1)) xb = 1. elif self.data_format == 'channels_last': kernel_sum_axes = [0, 1, 2] if self.use_bias: b = K.reshape(self.b, (1, 1, 1, self.filters)) xb = 1. tmp = K.sum(K.square(self.W), axis=kernel_sum_axes, keepdims=True) Wnorm = K.sqrt(tmp + K.square(b) + K.epsilon()) tmp = KC.conv2d(K.square(x), self.kernel_norm, strides=self.strides, padding=self.padding, data_format=self.data_format, filter_shape=self.kernel_norm_shape) xnorm = K.sqrt(tmp + xb + K.epsilon()) W = self.W / Wnorm output = KC.conv2d(x, W, strides=self.strides, padding=self.padding, data_format=self.data_format, filter_shape=self.kernel_shape) if K.backend() == 'theano': xnorm = K.pattern_broadcast(xnorm, [False, True, False, False]) output /= xnorm if self.use_bias: b /= Wnorm if self.data_format == 'channels_first': b = K.reshape(b, (1, self.filters, 1, 1)) elif self.data_format == 'channels_last': b = K.reshape(b, (1, 1, 1, self.filters)) else: raise ValueError('Invalid data_format:', self.data_format) b /= xnorm output += b output = self.activation(output) return output
def __init__(self, name: str, checkpoint_folder: str, weight_folder: str, logs_folder: str, make_generator_model=make_generator_model, make_discriminator_model=None, patchsize=(32, 32, 32), *args, **kwargs): self.name = name self.patchsize = patchsize if K.backend() == "tensorflow": from tensorflow.python.client import device_lib print(device_lib.list_local_devices()) if not isdir(checkpoint_folder): raise Exception( f"Checkpoint's folder unknow : {checkpoint_folder}") else: self.checkpoint_folder = get_and_create_dir( normpath(join(checkpoint_folder, name))) if not isdir(weight_folder): raise Exception(f"Weight's folder unknow : {weight_folder}") else: self.weight_folder = get_and_create_dir( normpath(join(weight_folder, name))) if not isdir(logs_folder): raise Exception(f" Logs's folder unknow : {logs_folder}") else: self.logs_folder = get_and_create_dir( normpath(join(logs_folder, name))) self.optimizer_gen = keras.optimizers.Adam() self.generator = make_generator_model("gen", self.patchsize, 4) self.generator.summary() self.checkpoint = tf.train.Checkpoint(epoch=tf.Variable(0, name='epoch'), optimizer_G=self.optimizer_gen, model=self.generator) self.checkpoint_manager = tf.train.CheckpointManager( checkpoint=self.checkpoint, directory=self.checkpoint_folder, max_to_keep=3)
def convert(in_path, out_path): """Convert any tensorflow.Keras model to the frugally-deep model format.""" assert K.backend() == "tensorflow" assert K.floatx() == "float32" assert K.image_data_format() == 'channels_last' print('loading {}'.format(in_path)) model = load_model(in_path) # Force creation of underlying functional model. # see: https://github.com/fchollet/tensorflow.keras/issues/8136 # Loss and optimizer type do not matter, since we do not train the model. model.compile(loss='mse', optimizer='sgd') model = convert_sequential_to_model(model) test_data = gen_test_data(model) json_output = {} json_output['architecture'] = json.loads(model.to_json()) json_output['image_data_format'] = K.image_data_format() for depth in range(1, 3, 1): json_output['conv2d_valid_offset_depth_' + str(depth)] = \ check_operation_offset(depth, offset_conv2d_eval, 'valid') json_output['conv2d_same_offset_depth_' + str(depth)] = \ check_operation_offset(depth, offset_conv2d_eval, 'same') json_output['separable_conv2d_valid_offset_depth_' + str(depth)] = \ check_operation_offset(depth, offset_sep_conv2d_eval, 'valid') json_output['separable_conv2d_same_offset_depth_' + str(depth)] = \ check_operation_offset(depth, offset_sep_conv2d_eval, 'same') json_output['max_pooling_2d_valid_offset'] = \ check_operation_offset(1, conv2d_offset_max_pool_eval, 'valid') json_output['max_pooling_2d_same_offset'] = \ check_operation_offset(1, conv2d_offset_max_pool_eval, 'same') json_output['average_pooling_2d_valid_offset'] = \ check_operation_offset(1, conv2d_offset_average_pool_eval, 'valid') json_output['average_pooling_2d_same_offset'] = \ check_operation_offset(1, conv2d_offset_average_pool_eval, 'same') json_output['input_shapes'] = list( map(get_layer_input_shape_shape5, get_model_input_layers(model))) json_output['tests'] = [test_data] json_output['trainable_params'] = get_all_weights(model) json_output['hash'] = calculate_hash(model) print('writing {}'.format(out_path)) write_text_file( out_path, json.dumps(json_output, allow_nan=False, indent=2, sort_keys=True))
def save_layer_outputs(input_img, model, layer_name, temp_folder, input_path): with get_evaluation_context(): layer_outputs = get_outputs_generator(model, layer_name)(input_img)[0] if K.backend() == 'theano': #correct for channel location difference betwen TF and Theano layer_outputs = np.rollaxis(layer_outputs, 0, 3) return [ save_layer_img(layer_outputs[:, :, channel], layer_name, channel, temp_folder, input_path) for channel in range(0, layer_outputs.shape[2]) ]
def __init__(self, shape, axis=0, dtype=None): super().__init__(dtype=dtype) self.trainable = False self.supports_masking = True self.matrix_shape = shape # self.dtype = dtype self.axis = axis # Check backend if K.backend() != "tensorflow": raise RuntimeError( "SqueezedSparseConversion only supports the Tensorflow backend" )
def swish(x): """Swish activation function: x * sigmoid(x). Reference: [Searching for Activation Functions](https://arxiv.org/abs/1710.05941) """ if backend.backend() == 'tensorflow': try: # The native TF implementation has a more # memory-efficient gradient implementation return backend.tf.nn.swish(x) except AttributeError: pass return x * backend.sigmoid(x)
def gather_nd(x, indices): """Works as TensorFlow's gather_nd.""" backend = K.backend() if backend == "theano": # todo: add theano function. raise NotImplementedError() elif backend == "tensorflow": # no global import => do not break if module is not present import tensorflow return tensorflow.gather_nd(x, indices) else: # todo: add cntk raise NotImplementedError()
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, # as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) # matrix instead of vector is required by tf `K.rnn` initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] if K.backend() == 'theano': from theano import tensor as T initial_best_idx = [T.unbroadcast(initial_best_idx[0], 1)] def gather_each_row(params, indices): n = K.shape(indices)[0] if K.backend() == 'theano': from theano import tensor as T return params[T.arange(n), indices] elif K.backend() == 'tensorflow': import tensorflow as tf indices = K.transpose(K.stack([tf.range(n), indices])) return tf.gather_nd(params, indices) else: raise NotImplementedError def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': from theano import tensor as T next_best_idx = T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)