def huber_loss(y_true, y_pred, delta=1.0): """Computes Huber loss value. For each value x in `error=y_true-y_pred`, the following is calculated: ``` 0.5 * x^2 if |x| <= d 0.5 * d^2 + d * (|x| - d) if |x| > d ``` where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss Args: y_true: tensor of true targets. y_pred: tensor of predicted targets. delta: A float, the point where the Huber loss function changes from a quadratic to linear. Returns: Tensor with one scalar loss entry per sample. """ y_pred = math_ops.cast(y_pred, dtype=K.floatx()) y_true = math_ops.cast(y_true, dtype=K.floatx()) error = math_ops.subtract(y_pred, y_true) abs_error = math_ops.abs(error) quadratic = math_ops.minimum(abs_error, delta) linear = math_ops.subtract(abs_error, quadratic) return math_ops.add( math_ops.multiply( ops.convert_to_tensor(0.5, dtype=quadratic.dtype), math_ops.multiply(quadratic, quadratic)), math_ops.multiply(delta, linear))
def test_on_batch(model, inputs, targets, sample_weights=None): """Calculates the loss for one input batch. Arguments: model: Model whose loss has to be calculated. inputs: Input batch data. targets: Target batch data. sample_weights: Sample weight batch data. Returns: total loss, loss and metrics associated with each output. """ if len(inputs) and not tensor_util.is_tensor(inputs[0]): inputs = [ ops.convert_to_tensor(val, dtype=backend.floatx()) for val in inputs ] targets = [ ops.convert_to_tensor(val, dtype=backend.floatx()) for val in targets ] if sample_weights: sample_weights = [ ops.convert_to_tensor(val, dtype=backend.floatx()) if val is not None else None for val in sample_weights ] outs, loss, loss_metrics = _model_loss( model, inputs, targets, sample_weights=sample_weights, training=False) if not isinstance(outs, list): outs = [outs] metrics_results = _eager_metrics_fn(model, outs, targets) if not isinstance(loss, list): loss = [loss] return loss + loss_metrics + metrics_results
def weighted(y_true, y_pred, weights, mask=None): """Wrapper function. Arguments: y_true: `y_true` argument of `fn`. y_pred: `y_pred` argument of `fn`. weights: Weights tensor. mask: Mask tensor. Returns: Scalar tensor. """ # score_array has ndim >= 2 score_array = fn(y_true, y_pred) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano mask = math_ops.cast(mask, K.floatx()) # mask should have the same shape as score_array score_array *= mask # the loss per batch should be proportional # to the number of unmasked samples. score_array /= K.mean(mask) # apply sample weighting if weights is not None: # reduce score_array to same ndim as weight array ndim = K.ndim(score_array) weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) score_array *= weights score_array /= K.mean( math_ops.cast(math_ops.not_equal(weights, 0), K.floatx())) return K.mean(score_array)
def sparse_categorical_accuracy(y_true, y_pred): y_true = math_ops.reduce_max(y_true, axis=-1) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by # argmax to compare. if K.dtype(y_true) == K.floatx(): y_pred = math_ops.cast(y_pred, K.floatx()) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def sparse_categorical_accuracy(y_true, y_pred): # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by # argmax to compare. if K.dtype(y_true) == K.floatx(): y_pred = math_ops.cast(y_pred, K.floatx()) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def __init__(self, x, y, image_data_generator, batch_size=32, shuffle=False, sample_weight=None, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png', subset=None, dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec( image.NumpyArrayIterator.__init__)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(NumpyArrayIterator, self).__init__( x, y, image_data_generator, batch_size=batch_size, shuffle=shuffle, sample_weight=sample_weight, seed=seed, data_format=data_format, save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format, subset=subset, **kwargs)
def array_to_img(x, data_format=None, scale=True, dtype=None): """Converts a 3D Numpy array to a PIL Image instance. Arguments: x: Input Numpy array. data_format: Image data format. either "channels_first" or "channels_last". scale: Whether to rescale image values to be within `[0, 255]`. dtype: Dtype to use. Returns: A PIL Image instance. Raises: ImportError: if PIL is not available. ValueError: if invalid `x` or `data_format` is passed. """ if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec(image.array_to_img)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype return image.array_to_img(x, data_format=data_format, scale=scale, **kwargs)
def build(self, input_shape): dtype = dtypes.as_dtype(self.dtype or K.floatx()) if not (dtype.is_floating or dtype.is_complex): raise TypeError('Unable to build `Dense` layer with non-floating point ' 'dtype %s' % (dtype,)) input_shape = tensor_shape.TensorShape(input_shape) if tensor_shape.dimension_value(input_shape[-1]) is None: raise ValueError('The last dimension of the inputs to `Dense` ' 'should be defined. Found `None`.') last_dim = tensor_shape.dimension_value(input_shape[-1]) self.input_spec = InputSpec(min_ndim=2, axes={-1: last_dim}) self.kernel = self.add_weight( 'kernel', shape=[last_dim, self.units], initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, dtype=self.dtype, trainable=True) if self.use_bias: self.bias = self.add_weight( 'bias', shape=[self.units,], initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint, dtype=self.dtype, trainable=True) else: self.bias = None self.built = True
def _apply_scores(self, scores, value, scores_mask=None): """Applies attention scores to the given value tensor. To use this method in your attention layer, follow the steps: * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape `[batch_size, Tv]` to calculate the attention `scores`. * Pass `scores` and `value` tensors to this method. The method applies `scores_mask`, calculates `attention_distribution = softmax(scores)`, then returns `matmul(attention_distribution, value). * Apply `query_mask` and return the result. Args: scores: Scores float tensor of shape `[batch_size, Tq, Tv]`. value: Value tensor of shape `[batch_size, Tv, dim]`. scores_mask: A boolean mask `Tensor` of shape `[batch_size, 1, Tv]` or `[batch_size, Tq, Tv]`. If given, scores at positions where `scores_mask==False` do not contribute to the result. It must contain at least one `True` value in each line along the last dimension. Returns: Tensor of shape `[batch_size, Tq, dim]`. """ if scores_mask is not None: padding_mask = math_ops.logical_not(scores_mask) # Bias so padding positions do not contribute to attention distribution. scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx()) attention_distribution = nn.softmax(scores) return math_ops.matmul(attention_distribution, value)
def apply_attention_scores(self, scores, value, value_mask=None): """Applies attention scores to the given value tensor. To use this method in your attention layer, follow the steps: * Use `query` tensor of shape `[batch_size, Tq]` and `key` tensor of shape `[batch_size, Tv]` to calculate the attention `scores`. * Pass `scores` and `value` tensors to this method. The method applies `value_mask`, calculates `attention_distribution = softmax(scores)`, then returns `matmul(attention_distribution, value). * Apply `query_mask` and return the result. Args: scores: Scores float tensor of shape `[batch_size, Tq, Tv]`. value: Value tensor of shape `[batch_size, Tv, dim]`. value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`. If given, will apply the mask such that values at positions where `mask==False` do not contribute to the result. Returns: Tensor of shape `[batch_size, Tq, dim]`. """ if value_mask is not None: # Mask of shape [batch_size, 1, Tv] that is True in padding position. padding_mask = array_ops.expand_dims( math_ops.logical_not(value_mask), axis=1) # Bias so padding positions do not contribute to attention distribution. scores -= 1.e9 * math_ops.cast(padding_mask, dtype=K.floatx()) attention_distribution = nn.softmax(scores) return math_ops.matmul(attention_distribution, value)
def __init__(self, input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None, **kwargs): if 'input_shape' not in kwargs: if input_length: kwargs['input_shape'] = (input_length,) else: kwargs['input_shape'] = (None,) dtype = kwargs.pop('dtype', K.floatx()) super(Embedding, self).__init__(dtype=dtype, **kwargs) self.input_dim = input_dim self.output_dim = output_dim self.embeddings_initializer = initializers.get(embeddings_initializer) self.embeddings_regularizer = regularizers.get(embeddings_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.embeddings_constraint = constraints.get(embeddings_constraint) self.mask_zero = mask_zero self.supports_masking = mask_zero self.input_length = input_length
def opt_variable(value, dtype=None, name=None, constraint=None): """Instantiates a variable and returns it.""" if dtype is None: dtype = backend.floatx() variables = [] for i in range(num_replicas): # Keras holds the variables in optimizer class instance , so the name # does not matter here. ResourceVariable constructor will find a unique # name (including name=None) for each replica. with ops.device("device:TPU:{}".format(i)): v = resource_variable_ops.ResourceVariable( value, dtype=dtypes_module.as_dtype(dtype), name=name, constraint=constraint) variables.append(v) name = "replicate_{}_{}".format("variable" if name is None else name, ops.uid()) v = ReplicatedVariable(name, variables) # pylint: disable=protected-access if isinstance(value, np.ndarray): v._keras_shape = value.shape elif hasattr(value, "shape"): v._keras_shape = backend.int_shape(value) v._uses_learning_phase = False backend.track_variable(v) return v
def _set_inputs_and_outputs(self, input_shape=None, tensor=None): """Set model's input and output specs based on the input received. If `tensor` is provided, `input_shape` is not required. Args: input_shape: Optional shape of input. tensor: Optional existing tensor to wrap into the `Input` layer. """ if not self.inputs: dtype = K.floatx() if tensor is not None: batch_shape = (None,) + tuple(tensor.get_shape().as_list()[1:]) x = Input(dtype=dtype, name=self.name + '_input', tensor=tensor) elif input_shape is not None: batch_shape = tuple(input_shape) x = Input( batch_shape=batch_shape, dtype=dtype, name=self.name + '_input') self.inputs = [x] for layer in self._layers: x = layer(x) self.outputs = [x] # Make sure that the model's input shape will be preserved during # serialization. if self._layers: self._layers[0]._batch_input_shape = batch_shape if self.inputs: self._init_graph_network(self.inputs, self.outputs, name=self.name) self.built = True if self._layers: self._track_layers(self._layers)
def categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): """Computes the categorical crossentropy loss. Args: y_true: tensor of true targets. y_pred: tensor of predicted targets. from_logits: Whether `y_pred` is expected to be a logits tensor. By default, we assume that `y_pred` encodes a probability distribution. label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. Returns: Categorical crossentropy loss value. """ y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx()) def _smooth_labels(): num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype) return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels, lambda: y_true) return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
def train_on_batch(model, inputs, targets, sample_weights=None): """Calculates the loss and gradient updates for one input batch. Arguments: model: Model whose loss has to be calculated. inputs: Input batch data. targets: Target batch data. sample_weights: Sample weight batch data. Returns: total loss and the loss associated with each output. """ if isinstance(inputs, collections.Sequence): if len(inputs) and tensor_util.is_tensor(inputs[0]): inputs = training_utils.cast_if_floating_dtype(inputs) targets = training_utils.cast_if_floating_dtype(targets) else: inputs = [ ops.convert_to_tensor(val, dtype=backend.floatx()) for val in inputs ] targets = [ ops.convert_to_tensor(val, dtype=backend.floatx()) for val in targets ] if sample_weights: sample_weights = [ ops.convert_to_tensor(val, dtype=backend.floatx()) if val is not None else None for val in sample_weights ] outs, loss, loss_metrics, _, masks = _process_single_batch( model, inputs, targets, sample_weights=sample_weights, training=True) if not isinstance(outs, list): outs = [outs] metrics_results = _eager_metrics_fn( model, outs, targets, sample_weights=sample_weights, masks=masks, return_stateful_result=False) loss = generic_utils.to_list(loss) return [ tensor_util.constant_value(v) for v in loss + loss_metrics + metrics_results ]
def __call__(self, x): if self.l1 or self.l2: regularization = ops.convert_to_tensor(0., dtype=K.floatx()) if self.l1: regularization += math_ops.reduce_sum(self.l1 * math_ops.abs(x)) if self.l2: regularization += math_ops.reduce_sum(self.l2 * math_ops.square(x)) return regularization return None
def __init__(self, from_logits=False, label_smoothing=0, reduction=losses_impl.ReductionV2.SUM_OVER_BATCH_SIZE, name=None): super(BinaryCrossentropy, self).__init__(reduction=reduction, name=name) self.from_logits = from_logits self.label_smoothing = ops.convert_to_tensor( label_smoothing, dtype=K.floatx())
def batch_predict_loop(model, inputs, batch_size, verbose=0): """Predict function for eager execution when input is arrays or tensors. Arguments: model: Instance of `Model`. inputs: List of input arrays. batch_size: Integer batch size. verbose: Verbosity mode. Returns: Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ outs = [] num_samples = training_utils.check_num_samples(inputs, batch_size) if verbose == 1: progbar = generic_utils.Progbar(target=num_samples) batches = generic_utils.make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] inputs_batch = slice_arrays(inputs, batch_ids) inputs_batch = [ ops.convert_to_tensor(val, dtype=backend.floatx()) for val in inputs_batch ] if len(inputs_batch) == 1: if model._expects_training_arg: batch_outs = model.call(inputs_batch[0], training=False) else: batch_outs = model.call(inputs_batch[0]) else: if model._expects_training_arg: batch_outs = model.call(inputs_batch, training=False) else: batch_outs = model.call(inputs_batch) if not isinstance(batch_outs, list): batch_outs = [batch_outs] if batch_index == 0: # Pre-allocate the results arrays. for batch_out in batch_outs: dims = batch_out.shape[1:].dims dims_list = [d.value for d in dims] shape = (num_samples,) + tuple(dims_list) outs.append(np.zeros(shape, dtype=batch_out.dtype.as_numpy_dtype)) for i, batch_out in enumerate(batch_outs): outs[i][batch_start:batch_end] = batch_out if verbose == 1: progbar.update(batch_end) if len(outs) == 1: return outs[0] return outs
def test_single_thing(self): a = np.ones(10) model_inputs = training_utils.ModelInputs(a) self.assertEqual(['input_1'], model_inputs.get_input_names()) vals = model_inputs.get_symbolic_inputs() self.assertTrue(tensor_util.is_tensor(vals)) vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) self.assertEqual(1, len(vals)) self.assertTrue(tensor_util.is_tensor(vals[0])) self.assertEqual(backend.floatx(), vals[0].dtype)
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a tensor encoding a batch of images. Arguments: x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. Returns: Preprocessed tensor. """ global _IMAGENET_MEAN if mode == 'tf': x /= 127.5 x -= 1. return x if mode == 'torch': x /= 255. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == 'channels_first': # 'RGB'->'BGR' if K.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None if _IMAGENET_MEAN is None: _IMAGENET_MEAN = constant_op.constant(-np.array(mean), dtype=K.floatx()) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): x = K.bias_add(x, math_ops.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) if std is not None: x /= std return x
def __init__(self, featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-6, rotation_range=0, width_shift_range=0., height_shift_range=0., brightness_range=None, shear_range=0., zoom_range=0., channel_shift_range=0., fill_mode='nearest', cval=0., horizontal_flip=False, vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec( image.ImageDataGenerator.__init__)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(ImageDataGenerator, self).__init__( featurewise_center=featurewise_center, samplewise_center=samplewise_center, featurewise_std_normalization=featurewise_std_normalization, samplewise_std_normalization=samplewise_std_normalization, zca_whitening=zca_whitening, zca_epsilon=zca_epsilon, rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, brightness_range=brightness_range, shear_range=shear_range, zoom_range=zoom_range, channel_shift_range=channel_shift_range, fill_mode=fill_mode, cval=cval, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, rescale=rescale, preprocessing_function=preprocessing_function, data_format=data_format, validation_split=validation_split, **kwargs)
def get_input_values(self): """Returns input values passed in.""" if context.executing_eagerly(): for i in range(len(self._flattened_inputs)): v = self._flattened_inputs[i] if tensor_util.is_tensor(v): v = cast_single_tensor(v) else: v = ops.convert_to_tensor(v, dtype=K.floatx()) self._flattened_inputs[i] = v return self._get(return_single_as_list=False)
def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): # pylint: disable=missing-docstring y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx()) def _smooth_labels(): return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels, lambda: y_true) return K.mean( K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
def sparse_categorical_accuracy(y_true, y_pred): # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the predicted output and actual output types don't match, force cast them # to match. if K.dtype(y_pred) != K.dtype(y_true): y_pred = math_ops.cast(y_pred, K.dtype(y_true)) return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
def call(self, inputs, mask=None): steps_axis = 1 if self.data_format == 'channels_last' else 2 if mask is not None: mask = math_ops.cast(mask, backend.floatx()) input_shape = inputs.shape.as_list() broadcast_shape = [-1, input_shape[steps_axis], 1] mask = array_ops.reshape(mask, broadcast_shape) inputs *= mask return backend.sum(inputs, axis=steps_axis) / math_ops.reduce_sum( mask, axis=steps_axis) else: return backend.mean(inputs, axis=steps_axis)
def cast_if_floating_dtype(x): """Casts the given data tensors to the default floating point type. Casts only if the input is already a floating point type. Args: x: tensor or list/tuple of tensors. Returns: Converted input. Raises: RuntimeError: if data isn't tensors. """ if not has_tensors(x): raise RuntimeError( 'Please provide tensors for casting, got: {x}'.format(x=x)) if isinstance(x, (list, tuple)): return [ math_ops.cast(val, dtype=K.floatx()) if tensor_util.is_tensor(val) and val.dtype.is_floating else val for val in x ] return math_ops.cast(x, dtype=K.floatx()) if x.dtype.is_floating else x
def build(self, input_shape=None): if input_shape and not self.inputs: batch_shape = tuple(input_shape) dtype = K.floatx() x = Input( batch_shape=batch_shape, dtype=dtype, name=self.name + '_input') self.inputs = [x] for layer in self._layers: x = layer(x) self.outputs = [x] if self.inputs: self._init_graph_network(self.inputs, self.outputs, name=self.name) self.built = True self._track_layers(self._layers)
def convert(in_path, out_path): """Convert any Keras model to the frugally-deep model format.""" assert K.backend() == "tensorflow" assert K.floatx() == "float32" assert K.image_data_format() == 'channels_last' print('loading {}'.format(in_path)) model = load_model(in_path) # Force creation of underlying functional model. # see: https://github.com/fchollet/keras/issues/8136 # Loss and optimizer type do not matter, since to don't train the model. model.compile(loss='mse', optimizer='sgd') model = convert_sequential_to_model(model) test_data = gen_test_data(model) json_output = {} json_output['architecture'] = json.loads(model.to_json()) json_output['image_data_format'] = K.image_data_format() for depth in range(1, 3, 1): json_output['conv2d_valid_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_conv2d_eval, 'valid') json_output['conv2d_same_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_conv2d_eval, 'same') json_output['separable_conv2d_valid_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_sep_conv2d_eval, 'valid') json_output['separable_conv2d_same_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_sep_conv2d_eval, 'same') json_output['max_pooling_2d_valid_offset'] =\ check_operation_offset(1, conv2d_offset_max_pool_eval, 'valid') json_output['max_pooling_2d_same_offset'] =\ check_operation_offset(1, conv2d_offset_max_pool_eval, 'same') json_output['average_pooling_2d_valid_offset'] =\ check_operation_offset(1, conv2d_offset_average_pool_eval, 'valid') json_output['average_pooling_2d_same_offset'] =\ check_operation_offset(1, conv2d_offset_average_pool_eval, 'same') json_output['input_shapes'] = get_shapes(test_data['inputs']) json_output['output_shapes'] = get_shapes(test_data['outputs']) json_output['tests'] = [test_data] json_output['trainable_params'] = get_all_weights(model) print('writing {}'.format(out_path)) write_text_file(out_path, json.dumps( json_output, allow_nan=False, indent=2, sort_keys=True))
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations, self.m_schedule] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g) v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t)) m_t_bar = (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [] lr = self.lr if self.initial_decay > 0: lr = lr * ( # pylint: disable=g-no-augmented-assignment 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]): t = math_ops.cast(self.iterations, K.floatx()) lr_t = lr * ( K.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsgrad: vhat_t = math_ops.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def __init__(self, input_shape=None, batch_size=None, dtype=None, input_tensor=None, sparse=False, name=None, ragged=False, **kwargs): strategy = distribution_strategy_context.get_strategy() if strategy and batch_size is not None and \ distributed_training_utils.global_batch_size_supported(strategy): if batch_size % strategy.num_replicas_in_sync != 0: raise ValueError( 'The `batch_size` argument ({}) must be divisible by ' 'the number of replicas ({})'.format( batch_size, strategy.num_replicas_in_sync)) batch_size = batch_size // strategy.num_replicas_in_sync if 'batch_input_shape' in kwargs: batch_input_shape = kwargs.pop('batch_input_shape') if input_shape and batch_input_shape: raise ValueError('Only provide the input_shape OR ' 'batch_input_shape argument to ' 'InputLayer, not both at the same time.') batch_size = batch_input_shape[0] input_shape = batch_input_shape[1:] if kwargs: raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) if sparse and ragged: raise ValueError( 'Cannot set both sparse and ragged to True in a Keras input.') if not name: prefix = 'input' name = prefix + '_' + str(backend.get_uid(prefix)) if not dtype: if input_tensor is None: dtype = backend.floatx() else: dtype = backend.dtype(input_tensor) elif input_tensor is not None and input_tensor.dtype != dtype: raise ValueError( '`input_tensor.dtype` differs from `dtype`: %s vs. %s' % (input_tensor.dtype, dtype)) super(InputLayer, self).__init__(dtype=dtype, name=name) self.built = True self.sparse = sparse self.ragged = ragged self.batch_size = batch_size self.supports_masking = True if isinstance(input_shape, tensor_shape.TensorShape): input_shape = tuple(input_shape.as_list()) elif isinstance(input_shape, int): input_shape = (input_shape, ) if input_tensor is None: if input_shape is not None: batch_input_shape = (batch_size, ) + tuple(input_shape) else: batch_input_shape = None graph = backend.get_graph() with graph.as_default(): input_tensor = backend.placeholder(shape=batch_input_shape, dtype=dtype, name=self.name, sparse=sparse, ragged=ragged) self.is_placeholder = True self._batch_input_shape = batch_input_shape else: raise_eager_tensor_error = False if keras_tensor.keras_tensors_enabled(): if not isinstance(input_tensor, keras_tensor.keras_tensors_enabled()): raise_eager_tensor_error = True else: if not tf_utils.is_symbolic_tensor(input_tensor): raise_eager_tensor_error = True if raise_eager_tensor_error: raise ValueError( 'You should not pass an EagerTensor to `Input`. ' 'For example, instead of creating an ' 'InputLayer, you should instantiate your model and ' 'directly call it on your input.') self.is_placeholder = False try: self._batch_input_shape = tuple(input_tensor.shape.as_list()) except ValueError: # If the shape cannot be represented as a tuple (e.g. unknown rank) self._batch_input_shape = None # Create an input node. input_tensor._keras_mask = None node_module.Node(layer=self, outputs=input_tensor) # Store type spec if isinstance( input_tensor, (composite_tensor.CompositeTensor, keras_tensor.KerasTensor)): self._type_spec = input_tensor._type_spec # pylint: disable=protected-access else: self._type_spec = tensor_spec.TensorSpec(shape=input_tensor.shape, dtype=input_tensor.dtype, name=self.name)
def _get_batches_of_transformed_samples(self, index_array): if self.data_format == 'channels_first': batch_x = np.zeros( (len(index_array), self.x.shape[1], self.frames_per_batch, self.x.shape[3], self.x.shape[4])) if self.y is not None: batch_y = np.zeros( (len(index_array), self.y.shape[1], self.frames_per_batch, self.y.shape[3], self.y.shape[4])) else: batch_x = np.zeros( tuple([len(index_array), self.frames_per_batch] + list(self.x.shape)[2:])) if self.y is not None: batch_y = np.zeros( tuple([len(index_array), self.frames_per_batch] + list(self.y.shape)[2:])) for i, j in enumerate(index_array): if self.y is not None: y = self.y[j] # Sample along the time axis last_frame = self.x.shape[self.time_axis] - self.frames_per_batch time_start = np.random.randint(0, high=last_frame) time_end = time_start + self.frames_per_batch if self.time_axis == 1: x = self.x[j, time_start:time_end, ...] if self.y is not None: y = self.y[j, time_start:time_end, ...] elif self.time_axis == 2: x = self.x[j, :, time_start:time_end, ...] if self.y is not None: y = self.y[j, :, time_start:time_end, ...] if self.y is not None: x, y = self.movie_data_generator.random_transform(x.astype( K.floatx()), y=y) x = self.movie_data_generator.standardize(x) batch_y[i] = y else: x = self.movie_data_generator.random_transform( x.astype(K.floatx())) batch_x[i] = x if self.save_to_dir: time_axis = 2 if self.data_format == 'channels_first' else 1 for i, j in enumerate(index_array): for frame in range(batch_x.shape[time_axis]): if time_axis == 2: img = array_to_img(batch_x[i, :, frame], self.data_format, scale=True) else: img = array_to_img(batch_x[i, frame], self.data_format, scale=True) fname = '{prefix}_{index}_{hash}.{format}'.format( prefix=self.save_prefix, index=j, hash=np.random.randint(1e4), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) if self.y is not None: # Save argmax of y batch if self.time_axis == 2: img_y = np.argmax(batch_y[i, :, frame], axis=0) img_channel_axis = 0 img_y = batch_y[i, :, frame] else: img_channel_axis = -1 img_y = batch_y[i, frame] img_y = np.argmax(img_y, axis=img_channel_axis) img_y = np.expand_dims(img_y, axis=img_channel_axis) img = array_to_img(img_y, self.data_format, scale=True) fname = 'y_{prefix}_{index}_{hash}.{format}'.format( prefix=self.save_prefix, index=j, hash=np.random.randint(1e4), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) if self.y is None: return batch_x if self.skip is not None: batch_y = [batch_y] * (self.skip + 1) return batch_x, batch_y
def call(self, inputs, count_weights=None): if isinstance(inputs, (list, np.ndarray)): inputs = ops.convert_to_tensor_v2_with_dispatch(inputs) if inputs.shape.rank == 1: inputs = array_ops.expand_dims(inputs, 1) if count_weights is not None and self._output_mode != COUNT: raise ValueError( "count_weights is not used in `output_mode='tf-idf'`, " "or `output_mode='binary'`. Please pass a single input.") self._called = True if self._max_tokens is None: raise RuntimeError( "If you construct a `CategoryEncoding` layer with " "`max_tokens=None`, you need to call `adapt()` " "on it before using it") else: out_depth = self._max_tokens if self._output_mode == TFIDF: # If the input is a sparse tensor, we densify it with the default value of # -1. Because -1 is ignored by one_hot, this effectively drops the non-set # positions from the output encoding. if self._sparse: raise ValueError("`sparse=True` with `output_mode=tfidf` " "is not supported.") if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_ops.sparse_tensor_to_dense(inputs, default_value=-1) one_hot_data = array_ops.one_hot(inputs, depth=out_depth) counts = math_ops.reduce_sum(one_hot_data, axis=1) tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights) tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth))) return tf_idf_data binary_output = (self._output_mode == BINARY) if isinstance(inputs, sparse_tensor.SparseTensor): max_value = math_ops.reduce_max(inputs.values) else: max_value = math_ops.reduce_max(inputs) condition = math_ops.greater_equal( math_ops.cast(out_depth, max_value.dtype), max_value) control_flow_ops.Assert( condition, ["Input must be less than max_token {}".format(out_depth)]) if self._sparse: result = bincount_ops.sparse_bincount(inputs, weights=count_weights, minlength=out_depth, maxlength=out_depth, axis=-1, binary_output=binary_output) result = math_ops.cast(result, K.floatx()) batch_size = array_ops.shape(result)[0] result = sparse_tensor.SparseTensor( indices=result.indices, values=result.values, dense_shape=[batch_size, out_depth]) return result else: result = bincount_ops.bincount(inputs, weights=count_weights, minlength=out_depth, maxlength=out_depth, dtype=K.floatx(), axis=-1, binary_output=binary_output) result.set_shape(tensor_shape.TensorShape((None, out_depth))) return result
def train_model_conv_sample(model = None, dataset = None, optimizer = None, expt = "", it = 0, batch_size = 1, n_epoch = 100, direc_save = "/home/vanvalen/ImageAnalysis/DeepCell2/trained_networks/", direc_data = "/home/vanvalen/ImageAnalysis/DeepCell2/training_data_npz/", lr_sched = rate_scheduler(lr = 0.01, decay = 0.95), rotation_range = 0, flip = True, shear = 0, class_weights = None): training_data_file_name = os.path.join(direc_data, dataset + ".npz") todays_date = datetime.datetime.now().strftime("%Y-%m-%d") file_name_save = os.path.join(direc_save, todays_date + "_" + dataset + "_" + expt + "_" + str(it) + ".h5") file_name_save_loss = os.path.join(direc_save, todays_date + "_" + dataset + "_" + expt + "_" + str(it) + ".npz") train_dict, (X_test, Y_test) = get_data(training_data_file_name, mode = 'conv_sample') class_weights = class_weights #train_dict["class_weights"] # the data, shuffled and split between train and test sets print('Training data shape:', train_dict["channels"].shape) print('Training labels shape:', train_dict["labels"].shape) print('Testing data shape:', X_test.shape) print('Testing labels shape:', Y_test.shape) # determine the number of classes output_shape = model.layers[-1].output_shape n_classes = output_shape[-1] print output_shape, n_classes class_weights = np.array([1,1,1], dtype = K.floatx()) def loss_function(y_true, y_pred): return sample_categorical_crossentropy(y_true, y_pred, axis = 3, class_weights = class_weights, from_logits = False) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) print('Using real-time data augmentation.') # this will do preprocessing and realtime data augmentation datagen = ImageFullyConvDataGenerator( rotation_range = rotation_range, # randomly rotate images by 0 to rotation_range degrees shear_range = shear, # randomly shear images in the range (radians , -shear_range to shear_range) horizontal_flip= flip, # randomly flip images vertical_flip= flip) # randomly flip images x,y = datagen.flow(train_dict, batch_size = 1).next() Y_test = np.rollaxis(Y_test,1,4) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator(datagen.flow(train_dict, batch_size = batch_size), steps_per_epoch = train_dict["labels"].shape[0]/batch_size, epochs = n_epoch, validation_data = (X_test, Y_test), validation_steps = X_test.shape[0]/batch_size, callbacks = [ModelCheckpoint(file_name_save, monitor = 'val_loss', verbose = 1, save_best_only = True, mode = 'auto'), LearningRateScheduler(lr_sched)]) model.save_weights(file_name_save) np.savez(file_name_save_loss, loss_history = loss_history.history) data_location = '/home/vanvalen/Data/RAW_40X_tube/set1/' channel_names = ["channel004", "channel001"] image_list = get_images_from_directory(data_location, channel_names) image = image_list[0] for j in xrange(image.shape[1]): image[0,j,:,:] = process_image(image[0,j,:,:], 30, 30, False) pred = model.predict(image) for j in xrange(3): save_name = 'feature_' +str(j) + '.tiff' tiff.imsave(save_name, pred[0,:,:,j]) return model
def test_anchors_for_shape_values(self): sizes = [12] strides = [8] ratios = np.array([1, 2], K.floatx()) scales = np.array([1, 2], K.floatx()) anchor_params = utils.AnchorParameters(sizes, strides, ratios, scales) pyramid_levels = [3] image_shape = (16, 16) all_anchors = utils.anchors_for_shape(image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params) # using almost_equal for floating point imprecisions self.assertAllClose(all_anchors[0, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[1, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[2, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[3, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[4, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[5, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[6, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[7, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[8, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[9, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[10, :], [ strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[11, :], [ strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[12, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[13, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, ]) self.assertAllClose(all_anchors[14, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, ]) self.assertAllClose(all_anchors[15, :], [ strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, ])
def compute_output_signature(self, input_spec): output_shape = self.compute_output_shape(input_spec.shape.as_list()) output_dtype = K.floatx() if self._output_mode == TFIDF else dtypes.int64 return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
def iterator_fit_loop(model, inputs, class_weight, steps_per_epoch, callback_model, out_labels, epoch_logs, val_inputs=None, val_targets=None, val_sample_weights=None, epochs=1, verbose=1, callbacks=None, callback_metrics=None, validation_steps=None, do_validation=False, batch_size=None): """Fit function for eager execution when input is given as dataset iterator. Updates the given epoch logs. Arguments: model: Instance of the `Model`. inputs: Input dataset iterator. class_weight: Optional class-weight array to weight the importance of samples in `inputs` based on the class they belong to, as conveyed by the targets from the `inputs` iterator. steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. callback_model: Instance of `Model` to callback. out_labels: Output labels generated from model metric names. epoch_logs: Dictionary of logs from every epoch. val_inputs: Input data for validation. val_targets: Target data for validation. val_sample_weights: Sample weight data for validation. epochs: Number of times to iterate over the data verbose: Verbosity mode, 0, 1 or 2 callbacks: List of callbacks to be called during training callback_metrics: List of strings, the display names of the metrics passed to the callbacks. They should be the concatenation of list the display names of the outputs of `f` and the list of display names of the outputs of `f_val`. validation_steps: Number of steps to run validation for (only if doing validation from data tensors). Ignored with default value of `None`. do_validation: Boolean value indicating whether we should do validation. batch_size: int, val_inputs and val_targets will be evaled batch by batch with size batch_size if they are array. Raises: ValueError: In case of mismatch between given number of inputs and expectations of the model. """ assert isinstance(inputs, iterator_ops.EagerIterator) # make sure either x,y or x,y,sample_weights is provided if (not isinstance(inputs.output_shapes, (list, tuple)) or len(inputs.output_shapes) not in (2, 3)): raise ValueError('Please provide either inputs and targets' 'or inputs, targets, and sample_weights') for step_index in range(steps_per_epoch): batch_logs = {'batch': step_index, 'size': 1} callbacks.on_batch_begin(step_index, batch_logs) # Get data from the iterator. try: next_element = inputs.get_next() except errors.OutOfRangeError: logging.warning( 'Your dataset iterator ran out of data; ' 'interrupting training. Make sure that your dataset' ' can generate at least `steps_per_epoch * epochs` ' 'batches (in this case, %d batches).' % steps_per_epoch * epochs) break if len(inputs.output_shapes) == 2: x, y = next_element sample_weights = None else: x, y, sample_weights = next_element # Validate and standardize data. x, y, sample_weights = model._standardize_user_data( x, y, sample_weight=sample_weights, class_weight=class_weight) x = training_utils.cast_if_floating_dtype(x) y = training_utils.cast_if_floating_dtype(y) if sample_weights: sample_weights = [ training_utils.cast_if_floating_dtype( ops.convert_to_tensor(val, dtype=backend.floatx())) if val is not None else None for val in sample_weights ] if step_index == 0 and not callback_metrics: out_labels = model.metrics_names if do_validation: callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] else: callback_metrics = copy.copy(out_labels) callbacks.set_params({ 'epochs': epochs, 'steps': steps_per_epoch, 'verbose': verbose, 'do_validation': do_validation, 'metrics': callback_metrics or [], }) # Train model. outs, loss, loss_metrics = _process_single_batch( model, x, y, sample_weights=sample_weights, training=True) if not isinstance(outs, list): outs = [outs] # Calculate metrics. for l, o in zip(out_labels, outs): batch_logs[l] = o # Required for eager execution metrics_results = _eager_metrics_fn(model, outs, y) batch_logs['loss'] = tensor_util.constant_value(backend.mean(loss)) for k, v in zip(model.metrics_names, [backend.mean(loss)] + loss_metrics + metrics_results): batch_logs[k] = tensor_util.constant_value(v) callbacks.on_batch_end(step_index, batch_logs) if callback_model.stop_training: break if step_index == steps_per_epoch - 1: if do_validation: val_outs = test_loop( model, val_inputs, val_targets, sample_weights=val_sample_weights, steps=validation_steps, verbose=0, batch_size=batch_size) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. for l, o in zip(out_labels, val_outs): epoch_logs['val_' + l] = o
def __init__(self, max_tokens, num_oov_indices, mask_token, oov_token, vocabulary=None, invert=False, output_mode=INT, sparse=False, pad_to_max_tokens=False, **kwargs): # If max_tokens is set, the value must be greater than 1 - otherwise we # are creating a 0-element vocab, which doesn't make sense. if max_tokens is not None and max_tokens <= 1: raise ValueError("If set, `max_tokens` must be greater than 1. " "You passed {}".format(max_tokens)) if num_oov_indices < 0: raise ValueError( "`num_oov_indices` must be greater than or equal to 0. " "You passed {}".format(num_oov_indices)) # 'output_mode' must be one of (INT, BINARY, COUNT, TFIDF) layer_utils.validate_string_arg(output_mode, allowable_strings=(INT, BINARY, COUNT, TFIDF), layer_name=self.__class__.__name__, arg_name="output_mode") if invert and output_mode != INT: raise ValueError( "`output_mode` must be {} when `invert` is true. You " "passed {}".format(INT, output_mode)) self.invert = invert self.max_tokens = max_tokens self.num_oov_indices = num_oov_indices self.oov_token = oov_token self.mask_token = mask_token self.output_mode = output_mode self.sparse = sparse self.pad_to_max_tokens = pad_to_max_tokens self._called = False self._vocab_size = 0 # We need to keep track our current vocab size outside of our layer weights # to support a static output shape when `output_mode != INT`. The bincount # ops do not set shape on their outputs, which means we have to set it # ourselves. We persist the current vocab size as a hidden part of the # config when serializing our model. if "vocabulary_size" in kwargs: self._vocab_size = kwargs["vocabulary_size"] del kwargs["vocabulary_size"] if max_tokens is not None: available_vocab_size = max_tokens - self._token_start_index() else: available_vocab_size = None super(IndexLookup, self).__init__(combiner=_IndexLookupCombiner( vocab_size=available_vocab_size, mask_value=mask_token, oov_value=oov_token, compute_idf=(output_mode == TFIDF)), **kwargs) # We need to save the key dtype so that we know if we're expecting int64 # keys. If we are, we will cast int32 inputs to int64 as well. if invert: self._key_dtype = dtypes.int64 self._value_dtype = self.dtype self._mask_key = 0 self._mask_value = mask_token key_index = lookup_ops.TextFileIndex.LINE_NUMBER value_index = lookup_ops.TextFileIndex.WHOLE_LINE default_value = self.oov_token oov_indices = None else: self._key_dtype = self.dtype self._value_dtype = dtypes.int64 self._mask_key = mask_token key_index = lookup_ops.TextFileIndex.WHOLE_LINE value_index = lookup_ops.TextFileIndex.LINE_NUMBER # Masks should map to 0 for int output and be dropped otherwise. Max ints # will be dropped from the bincount op. self._mask_value = 0 if self.output_mode == INT else dtypes.int64.max oov_start = self._oov_start_index() token_start = self._token_start_index() if self.num_oov_indices == 0: # If there are no OOV indices, we map OOV tokens to -1 for int output # and drop them from bagged output. Max ints will be dropped from the # bincount op. default_value = -1 if self.output_mode == INT else dtypes.int64.max oov_indices = None elif self.num_oov_indices == 1: # If there is only one OOV index, we can set that index as the default # value of the index_lookup table. default_value = oov_start oov_indices = None else: # If we hav multiple OOV values, we need to do a further hashing step; # to make this easier, we set the OOV value to -1. (This lets us do a # vectorized add and cast to boolean to determine locations where we # need to do extra hashing.) default_value = -1 oov_indices = list(range(oov_start, token_start)) if vocabulary is not None and isinstance(vocabulary, str): if not os.path.exists(vocabulary): raise ValueError("Vocabulary file %s does not exist." % vocabulary) total_offset = 0 if mask_token is None else 1 total_offset += num_oov_indices initializer = lookup_ops.TextFileInitializer( filename=vocabulary, key_dtype=self._key_dtype, key_index=key_index, value_dtype=self._value_dtype, value_index=value_index, value_index_offset=total_offset) self._table = self._static_table_class()( initializer, default_value=default_value) self._table_handler = table_utils.TableHandler( table=self._table, mask_token=self._mask_key, mask_value=self._mask_value, oov_tokens=oov_indices, use_v1_apis=self._use_v1_apis()) self.max_tokens = (self._table_handler.table_size() + self.num_oov_indices + (0 if mask_token is None else 1)) else: self._table = lookup_ops.MutableHashTable( key_dtype=self._key_dtype, value_dtype=self._value_dtype, default_value=default_value, name=(self._name + "_index_table")) self._table_handler = table_utils.TableHandler( table=self._table, oov_tokens=oov_indices, use_v1_apis=self._use_v1_apis()) if vocabulary is not None: self.set_vocabulary(vocabulary) if self.output_mode == TFIDF: # The TF-IDF weight may have a (None,) tensorshape. This creates # a 1D variable with arbitrary shape, which we can assign any weight to # so long as it has 1 dimension. In order to properly initialize this # weight in Keras, we need to provide a custom callable initializer which # does not depend on the shape of the weight (as all other initializers # do) since the weight is not known. Hence the lambda shape, dtype: [0]. if not self.pad_to_max_tokens or max_tokens is None: initializer = lambda shape, dtype: [0] else: initializer = init_ops.zeros_initializer # We are adding these here instead of in build() since they do not depend # on the input shape at all. idf_shape = (max_tokens, ) if self.pad_to_max_tokens else (None, ) self.tf_idf_weights = self._add_state_variable( name="idf", shape=tensor_shape.TensorShape(idf_shape), dtype=K.floatx(), initializer=initializer) tracked_table = self._add_trackable(self._table, trainable=False) # This is a workaround for summary() on this layer. Because the table is # not mutable during training, the effective number of parameters (and so # the weight shape) is 0; we add this as an attr so that the parameter # counting code in the Model object doesn't throw an attribute error. tracked_table.shape = tensor_shape.TensorShape((0, ))
def reshape_movie(X, y, reshape_size=256): """ Reshape tensor of dimension 5 to have x and y of size reshape_size. Adds overlapping slices to batches. E.g. reshape_size of 256 yields (1, 5, 1024, 1024, 1) -> (16, 5, 256, 256, 1) Args: X (numpy.array): raw 5D image tensor y (numpy.array): label mask of 5D image tensor reshape_size (int): size of the square output tensor Returns: numpy.array: reshaped X and y tensors in shape (reshape_size, reshape_size) Raises: ValueError: X.ndim is not 5 ValueError: y.ndim is not 5 """ is_channels_first = K.image_data_format() == 'channels_first' if X.ndim != 5: raise ValueError('reshape_movie expects X dim to be 5, got {}'.format(X.ndim)) elif y.ndim != 5: raise ValueError('reshape_movie expects y dim to be 5, got {}'.format(y.ndim)) image_size_x, image_size_y = X.shape[3:] if is_channels_first else X.shape[2:4] rep_number = np.int(np.ceil(np.float(image_size_x) / np.float(reshape_size))) new_batch_size = X.shape[0] * (rep_number) ** 2 if is_channels_first: new_X_shape = (new_batch_size, X.shape[1], X.shape[2], reshape_size, reshape_size) new_y_shape = (new_batch_size, y.shape[1], y.shape[2], reshape_size, reshape_size) else: new_X_shape = (new_batch_size, X.shape[1], reshape_size, reshape_size, X.shape[4]) new_y_shape = (new_batch_size, y.shape[1], reshape_size, reshape_size, y.shape[4]) new_X = np.zeros(new_X_shape, dtype=K.floatx()) new_y = np.zeros(new_y_shape, dtype='int32') counter = 0 row_axis = 3 if is_channels_first else 2 col_axis = 4 if is_channels_first else 3 for b in range(X.shape[0]): for i in range(rep_number): for j in range(rep_number): if i != rep_number - 1: x_start, x_end = i * reshape_size, (i + 1) * reshape_size else: x_start, x_end = -reshape_size, X.shape[row_axis] if j != rep_number - 1: y_start, y_end = j * reshape_size, (j + 1) * reshape_size else: y_start, y_end = -reshape_size, y.shape[col_axis] if is_channels_first: new_X[counter] = X[b, :, :, x_start:x_end, y_start:y_end] new_y[counter] = relabel_movie(y[b, :, :, x_start:x_end, y_start:y_end]) else: new_X[counter] = X[b, :, x_start:x_end, y_start:y_end, :] new_y[counter] = relabel_movie(y[b, :, x_start:x_end, y_start:y_end, :]) counter += 1 print('Reshaped feature data from {} to {}'.format(y.shape, new_y.shape)) print('Reshaped training data from {} to {}'.format(X.shape, new_X.shape)) return new_X, new_y
def compute_output_signature(self, input_spec): output_shape = self.compute_output_shape(input_spec.shape.as_list()) output_dtype = self._value_dtype if self.output_mode == INT else K.floatx( ) return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
def inner_distance_transform_3d(mask, bins=None, erosion_width=None, alpha=0.1, beta=1, sampling=[0.5, 0.217, 0.217]): """Transform a label mask for a z-stack with an inner distance transform. inner_distance = 1 / (1 + beta * alpha * distance_to_center) Args: mask (numpy.array): A label mask (y data). bins (int): The number of transformed distance classes. Defaults to None. erosion_width (int): Number of pixels to erode edges of each labels alpha (float, str): Coefficent to reduce the magnitude of the distance value. If 'auto', determines alpha for each cell based on the cell area. Defaults to 0.1. beta (float): Scale parameter that is used when alpha is set to auto. Defaults to 1. sampling (list): Spacing of pixels along each dimension. Defaults to [0.5, 0.217, 0.217]. Returns: numpy.array: A mask of same shape as input mask, with each label being a distance class from 1 to bins. Raises: ValueError: alpha is a string but not set to "auto". """ # Check input to alpha if isinstance(alpha, str): if alpha.lower() != 'auto': raise ValueError('alpha must be set to "auto"') mask = np.squeeze(mask) mask = erode_edges(mask, erosion_width) distance = ndimage.distance_transform_edt(mask, sampling=sampling) distance = distance.astype(K.floatx()) label_matrix = label(mask) inner_distance = np.zeros(distance.shape, dtype=K.floatx()) for prop in regionprops(label_matrix, distance): coords = prop.coords center = prop.weighted_centroid distance_to_center = (coords - center) * np.array(sampling) distance_to_center = np.sum(distance_to_center**2, axis=1) # Determine alpha to use if str(alpha).lower() == 'auto': _alpha = 1 / np.cbrt(prop.area) else: _alpha = float(alpha) center_transform = 1 / (1 + beta * _alpha * distance_to_center) coords_z = coords[:, 0] coords_x = coords[:, 1] coords_y = coords[:, 2] inner_distance[coords_z, coords_x, coords_y] = center_transform if bins is None: return inner_distance # divide into bins min_dist = np.amin(inner_distance.flatten()) max_dist = np.amax(inner_distance.flatten()) distance_bins = np.linspace(min_dist - K.epsilon(), max_dist + K.epsilon(), num=bins + 1) inner_distance = np.digitize(inner_distance, distance_bins, right=True) return inner_distance - 1 # minimum distance should be 0, not 1
def _cast_tensor_to_floatx(x): """Cast tensor to keras's floatx dtype if it is not already the same dtype.""" if x.dtype == K.floatx(): return x else: return math_ops.cast(x, K.floatx())
def categorical_accuracy(y_true, y_pred): return math_ops.cast( math_ops.equal(math_ops.argmax(y_true, axis=-1), math_ops.argmax(y_pred, axis=-1)), K.floatx())
def cast_single_tensor(x): if tensor_util.is_tensor(x) and x.dtype.is_floating: return math_ops.cast(x, dtype=K.floatx()) return x
def reshape_matrix(X, y, reshape_size=256): """ Reshape matrix of dimension 4 to have x and y of size reshape_size. Adds overlapping slices to batches. E.g. reshape_size of 256 yields (1, 1024, 1024, 1) -> (16, 256, 256, 1) The input image is divided into subimages of side length reshape_size, with the last row and column of subimages overlapping the one before the last if the original image side lengths are not divisible by reshape_size. Args: X (numpy.array): raw 4D image tensor y (numpy.array): label mask of 4D image data reshape_size (int, list): size of the output tensor If input is int, output images are square with side length equal reshape_size. If it is a list of 2 ints, then the output images size is reshape_size[0] x reshape_size[1] Returns: numpy.array: reshaped X and y 4D tensors in shape[1:3] = (reshape_size, reshape_size), if reshape_size is an int, and shape[1:3] reshape_size, if reshape_size is a list of length 2 Raises: ValueError: X.ndim is not 4 ValueError: y.ndim is not 4 """ is_channels_first = K.image_data_format() == 'channels_first' if X.ndim != 4: raise ValueError('reshape_matrix expects X dim to be 4, got', X.ndim) elif y.ndim != 4: raise ValueError('reshape_matrix expects y dim to be 4, got', y.ndim) if isinstance(reshape_size, int): reshape_size_x = reshape_size_y = reshape_size elif len(reshape_size) == 2 and all(isinstance(x, int) for x in reshape_size): reshape_size_x, reshape_size_y = reshape_size else: raise ValueError('reshape_size must be an integer or an iterable containing 2 integers.') image_size_x, image_size_y = X.shape[2:] if is_channels_first else X.shape[1:3] rep_number_x = np.int(np.ceil(np.float(image_size_x) / np.float(reshape_size_x))) rep_number_y = np.int(np.ceil(np.float(image_size_y) / np.float(reshape_size_y))) new_batch_size = X.shape[0] * rep_number_x * rep_number_y if is_channels_first: new_X_shape = (new_batch_size, X.shape[1], reshape_size_x, reshape_size_y) new_y_shape = (new_batch_size, y.shape[1], reshape_size_x, reshape_size_y) else: new_X_shape = (new_batch_size, reshape_size_x, reshape_size_y, X.shape[3]) new_y_shape = (new_batch_size, reshape_size_x, reshape_size_y, y.shape[3]) new_X = np.zeros(new_X_shape, dtype=K.floatx()) new_y = np.zeros(new_y_shape, dtype='int32') counter = 0 for b in range(X.shape[0]): for i in range(rep_number_x): for j in range(rep_number_y): _axis = 2 if is_channels_first else 1 if i != rep_number_x - 1: x_start, x_end = i * reshape_size_x, (i + 1) * reshape_size_x else: x_start, x_end = -reshape_size_x, X.shape[_axis] if j != rep_number_y - 1: y_start, y_end = j * reshape_size_y, (j + 1) * reshape_size_y else: y_start, y_end = -reshape_size_y, y.shape[_axis + 1] if is_channels_first: new_X[counter] = X[b, :, x_start:x_end, y_start:y_end] new_y[counter] = y[b, :, x_start:x_end, y_start:y_end] else: new_X[counter] = X[b, x_start:x_end, y_start:y_end, :] new_y[counter] = y[b, x_start:x_end, y_start:y_end, :] new_y[counter] = relabel_movie(new_y[counter]) counter += 1 print('Reshaped feature data from {} to {}'.format(y.shape, new_y.shape)) print('Reshaped training data from {} to {}'.format(X.shape, new_X.shape)) return new_X, new_y
def __init__(self, name=None, dtype=None): super(Metric, self).__init__(name=name, dtype=dtype) self.stateful = True # All metric layers are stateful. self.built = True self._dtype = K.floatx() if dtype is None else dtypes.as_dtype( dtype).name
def __init__(self, max_tokens=None, standardize=LOWER_AND_STRIP_PUNCTUATION, split=SPLIT_ON_WHITESPACE, ngrams=None, output_mode=INT, output_sequence_length=None, pad_to_max_tokens=True, **kwargs): # This layer only applies to string processing, and so should only have # a dtype of 'string'. if "dtype" in kwargs and kwargs["dtype"] != dtypes.string: raise ValueError( "TextVectorization may only have a dtype of string.") elif "dtype" not in kwargs: kwargs["dtype"] = dtypes.string # 'standardize' must be one of (None, LOWER_AND_STRIP_PUNCTUATION, callable) _validate_string_arg(standardize, allowable_strings=[LOWER_AND_STRIP_PUNCTUATION], arg_name="standardize") # 'split' must be one of (None, SPLIT_ON_WHITESPACE, callable) _validate_string_arg(split, allowable_strings=[SPLIT_ON_WHITESPACE], arg_name="split") # 'output_mode' must be one of (None, INT, COUNT, BINARY, TFIDF) _validate_string_arg(output_mode, allowable_strings=[INT, COUNT, BINARY, TFIDF], arg_name="output_mode", allow_callables=False) # 'ngrams' must be one of (None, int, tuple(int)) if not (ngrams is None or isinstance(ngrams, int) or isinstance(ngrams, tuple) and all(isinstance(item, int) for item in ngrams)): raise ValueError( ("`ngrams` must be None, an integer, or a tuple of " "integers. Got %s") % (ngrams, )) # 'output_sequence_length' must be one of (None, int) and is only # set if output_mode is INT. if (output_mode == INT and not (isinstance(output_sequence_length, int) or (output_sequence_length is None))): raise ValueError( "`output_sequence_length` must be either None or an " "integer when `output_mode` is 'int'. " "Got %s" % output_sequence_length) if output_mode != INT and output_sequence_length is not None: raise ValueError("`output_sequence_length` must not be set if " "`output_mode` is not 'int'.") self._max_tokens = max_tokens # In INT mode, we have two reserved values (PAD and OOV). However, non-INT # modes don't have a PAD value, so we only need to reserve one value. self._reserved_values = 2 if output_mode == INT else 1 # In INT mode, the zero value is reserved for padding (per Keras standard # padding approaches). In non-INT modes, there is no padding so we can set # the OOV value to zero instead of one. self._oov_value = 1 if output_mode == INT else 0 # We always reduce the max token number by 1 to account for the OOV token # if it is set. The PAD marker isn't really a token (it's the absence of a # token) so we don't account for it here. self._max_vocab_size = max_tokens - 1 if max_tokens is not None else None self._standardize = standardize self._split = split self._ngrams_arg = ngrams if isinstance(ngrams, int): self._ngrams = tuple(range(1, ngrams + 1)) else: self._ngrams = ngrams self._output_mode = output_mode self._output_sequence_length = output_sequence_length self._pad_to_max = pad_to_max_tokens self._has_vocab = False super(TextVectorization, self).__init__(combiner=_TextVectorizationCombiner( self._max_vocab_size, compute_idf=output_mode == TFIDF), **kwargs) self._table = lookup_ops.MutableHashTable( key_dtype=dtypes.string, value_dtype=dtypes.int64, default_value=self._oov_value, name=(self._name + "_index_table")) def fail(_): raise NotImplementedError( "Saving is not yet supported for TextVectorization layers.") self._table._list_extra_dependencies_for_serialization = fail # pylint: disable=protected-access self._add_trackable(self._table, trainable=False) # We are adding this here instead of in build() since it does not depend # on the input shape at all. if self._output_mode == TFIDF: # Create the TFIDF weight, but use a (None,) tensorshape. This creates # a 1D variable with arbitrary shape, which we can assign any weight to # so long as it has 1 dimension. In order to properly initialize this # weight in Keras, we need to provide a custom callable initializer which # does not depend on the shape of the weight (as all other initializers # do) since the weight is not known. Hence the lambda shape, dtype: [0]. self._tf_idf_weights = self.add_weight( name="tfidf_data", shape=tensor_shape.TensorShape((None, )), dtype=K.floatx(), trainable=False, initializer=lambda shape, dtype: [0])
def call(self, inputs): return inputs * math_ops.cast( math_ops.greater(inputs, self.theta), K.floatx())
def generate_placeholders_from_shape(shape): return array_ops.placeholder(shape=shape, dtype=backend.floatx())
def __init__(self, train_dict, movie_data_generator, batch_size=32, shuffle=False, transform=None, transform_kwargs={}, balance_classes=False, max_class_samples=None, window_size=(30, 30, 5), seed=None, data_format='channels_last', save_to_dir=None, save_prefix='', save_format='png'): X, y = train_dict['X'], train_dict['y'] if y is not None and X.shape[0] != y.shape[0]: raise ValueError('`X` (movie data) and `y` (labels) ' 'should have the same size. Found ' 'Found x.shape = {}, y.shape = {}'.format( X.shape, y.shape)) self.channel_axis = 4 if data_format == 'channels_last' else 1 self.time_axis = 1 if data_format == 'channels_last' else 2 self.x = np.asarray(X, dtype=K.floatx()) y = _transform_masks(y, transform, data_format=data_format, **transform_kwargs) if self.x.ndim != 5: raise ValueError('Input data in `SampleMovieArrayIterator` ' 'should have rank 5. You passed an array ' 'with shape', self.x.shape) window_size = conv_utils.normalize_tuple(window_size, 3, 'window_size') pixels_z, pixels_x, pixels_y, batch, y = sample_label_movie( y=y, padding='valid', window_size=window_size, max_training_examples=None, data_format=data_format) self.y = y self.win_x = window_size[0] self.win_y = window_size[1] self.win_z = window_size[2] self.pixels_x = pixels_x self.pixels_y = pixels_y self.pixels_z = pixels_z self.batch = batch self.movie_data_generator = movie_data_generator self.data_format = data_format self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format self.class_balance(max_class_samples, balance_classes, seed=seed) self.y = to_categorical(self.y).astype('int32') super(SampleMovieArrayIterator, self).__init__( len(self.y), batch_size, shuffle, seed)
def convert_for_inspection(t): if getattr(t, "shape", None) and getattr(t, "dtype", None): return t return np.array(t, dtype=backend.floatx())
def __call__(self, shape, dtype=K.floatx(), **kwargs): kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
def build(self, input_shape): self.rbm_weight = self.add_weight( name='rbm_weight', shape=(input_shape[1], self.output_dim), initializer='uniform' # Which initializer is optimal? , trainable=True) self.hidden_bias = self.add_weight(name='rbm_hidden_bias', shape=(self.output_dim, ), initializer='uniform', trainable=True) self.visible_bias = K.variable(initializers.get('uniform')( (input_shape[1], )), dtype=K.floatx(), name='rbm_visible_bias') # Make symbolic computation objects. if self.mode == MODE_VISIBLE_BERNOULLI: # Transform visible units. self.input_visible = K.placeholder(shape=(None, input_shape[1]), name='input_visible') self.transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.sigmoid( K.dot(self.input_visible, self.rbm_weight) + self.hidden_bias))) self.transform_func = K.function([self.input_visible], [self.transform]) # Transform hidden units. self.input_hidden = K.placeholder(shape=(None, self.output_dim), name='input_hidden') self.inv_transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.sigmoid( K.dot(self.input_hidden, K.transpose(self.rbm_weight)) + self.visible_bias))) self.inv_transform_func = K.function([self.input_hidden], [self.inv_transform]) elif self.mode == MODE_VISIBLE_GAUSSIAN: # Transform visible units. self.input_visible = K.placeholder(shape=(None, input_shape[1]), name='input_visible') self.transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.relu( K.dot(self.input_visible, self.rbm_weight) + self.hidden_bias))) #? self.transform_func = K.function([self.input_visible], [self.transform]) # Transform hidden units. self.input_hidden = K.placeholder(shape=(None, self.output_dim), name='input_hidden') self.inv_transform = Ke.multivariate_normal_diag( loc=(K.dot(self.input_hidden, K.transpose(self.rbm_weight)) + self.visible_bias), scale_diag=np.ones(shape=(self.hps['batch_size'], input_shape[1]))).sample() self.inv_transform_func = K.function([self.input_hidden], [self.inv_transform]) else: # TODO pass # Calculate free energy. #? self.free_energy = -1 * (K.squeeze(K.dot(self.input_visible, K.expand_dims(self.visible_bias, axis=-1)), -1) +\ K.sum(K.log(1 + K.exp(K.dot(self.input_visible, self.rbm_weight) +\ self.hidden_bias)), axis=-1)) self.free_energy_func = K.function([self.input_visible], [self.free_energy]) super(RBM, self).build(input_shape)
def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False, supports_masking=False): # generate input data if kwargs is None: kwargs = {} if input_data is None: if not input_shape: raise AssertionError() if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) input_mask = [] if all(isinstance(e, tuple) for e in input_data_shape): input_data = [] for e in input_data_shape: input_data.append( (10 * np.random.random(e)).astype(input_dtype)) if supports_masking: a = np.full(e[:2], False) a[:, :e[1] // 2] = True input_mask.append(a) else: input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) if supports_masking: a = np.full(input_data_shape[:2], False) a[:, :input_data_shape[1] // 2] = True print(a) print(a.shape) input_mask.append(a) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) try: expected_output_shape = layer.compute_output_shape(input_shape) except Exception: expected_output_shape = layer._compute_output_shape(input_shape) # test in functional API if isinstance(input_shape, list): if fixed_batch_size: x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape] if supports_masking: mask = [Input(batch_shape=e[0:2], dtype=bool) for e in input_shape] else: x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape] if supports_masking: mask = [Input(shape=(e[1],), dtype=bool) for e in input_shape] else: if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) if supports_masking: mask = Input(batch_shape=input_shape[0:2], dtype=bool) else: x = Input(shape=input_shape[1:], dtype=input_dtype) if supports_masking: mask = Input(shape=(input_shape[1],), dtype=bool) if supports_masking: y = layer(Masking()(x), mask=mask) else: y = layer(x) if not (K.dtype(y) == expected_output_dtype): raise AssertionError() # check with the functional API if supports_masking: model = Model([x, mask], y) actual_output = model.predict([input_data, input_mask[0]]) else: model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: if not (expected_dim == actual_dim): raise AssertionError("expected_shape", expected_output_shape, "actual_shape", actual_output_shape) if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
def iterator_test_loop(model, inputs, steps, verbose=0): """Test function for eager execution when input is given as dataset iterator. Arguments: model: Model instance that is being evaluated in Eager mode. inputs: Input dataset iterator. steps: Total number of steps (batches of samples) before declaring predictions finished. verbose: Verbosity mode. Returns: Scalar loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. Raises: ValueError: In case of mismatch between given number of inputs and expectations of the model. """ assert isinstance(inputs, iterator_ops.EagerIterator) # make sure either x,y or x,y,sample_weights is provided if (not isinstance(inputs.output_shapes, (list, tuple)) or len(inputs.output_shapes) < 2 or len(inputs.output_shapes) > 3): raise ValueError('Please provide either inputs and targets' 'or inputs, targets, and sample_weights') outs = [] # Create metric wrapper for the losses. output_loss_metrics = [] for i in range(len(model.outputs)): loss_fn = model.loss_functions[i] mean_wrapped_loss = metrics_module.MeanMetricWrapper( loss_fn, name=loss_fn.__name__) output_loss_metrics.append(mean_wrapped_loss) num_samples = 0 if verbose == 1: progbar = generic_utils.Progbar(target=steps) for step_index in range(steps): # Get data from the iterator. try: next_element = inputs.get_next() except errors.OutOfRangeError: logging.warning( 'Your dataset iterator ran out of data interrupting testing. ' 'Make sure that your dataset can generate at least `steps` batches ' '(in this case, %d batches). You may need to use the repeat() ' 'function when building your dataset.', steps) break if len(inputs.output_shapes) == 2: x, y = next_element sample_weights = None else: x, y, sample_weights = next_element # Validate and standardize data. x, y, sample_weights = model._standardize_user_data( x, y, sample_weight=sample_weights) x = training_utils.cast_if_floating_dtype(x) y = training_utils.cast_if_floating_dtype(y) if sample_weights: sample_weights = [ training_utils.cast_if_floating_dtype( ops.convert_to_tensor(val, dtype=backend.floatx())) if val is not None else None for val in sample_weights ] if step_index == 0: # Get stateful metrics indices. We do not do this before the `steps` loop # because model will be compiled only in the first iteration of this loop # in the deferred build scenario. if hasattr(model, 'metrics'): for m in model.stateful_metric_functions: m.reset_states() for m in output_loss_metrics: m.reset_states() # Calculate model output, loss values. loss_outs, loss, _, aggregated_loss_metrics, masks = _model_loss( model, x, y, output_loss_metrics=output_loss_metrics, sample_weights=sample_weights, training=False) metrics_results = _eager_metrics_fn(model, loss_outs, y, sample_weights=sample_weights, masks=masks) batch_outs = [] for _, v in zip(model.metrics_names, [backend.mean(loss)] + aggregated_loss_metrics + metrics_results): batch_outs.append(tensor_util.constant_value(v)) # Get current step size. if isinstance(x, list): step_size = x[0].get_shape().as_list()[0] elif isinstance(x, dict): step_size = list(x.values())[0].get_shape().as_list()[0] else: step_size = x.get_shape().as_list()[0] # Accumulate results in output array. if not isinstance(batch_outs, list): batch_outs = [batch_outs] if step_index == 0: for _ in enumerate(batch_outs): outs.append(0.) outs[0] += batch_outs[0] * step_size # index 0 = 'loss' outs[1:] = batch_outs[1:] # Calculate sample size. num_samples += step_size if verbose == 1: progbar.update(step_index + 1) outs[0] /= num_samples # index 0 = 'loss' if len(outs) == 1: return outs[0] return outs
def __call__(self, w): return w * math_ops.cast(math_ops.greater_equal(w, 0.), K.floatx())
def iterator_fit_loop(model, inputs, class_weight, steps_per_epoch, epoch_logs, val_inputs=None, val_targets=None, val_sample_weights=None, epochs=1, verbose=1, callbacks=None, validation_steps=None, do_validation=False, batch_size=None, output_loss_metrics=None): """Fit function for eager execution when input is given as dataset iterator. Updates the given epoch logs. Arguments: model: Instance of the `Model`. inputs: Input dataset iterator. class_weight: Optional class-weight array to weight the importance of samples in `inputs` based on the class they belong to, as conveyed by the targets from the `inputs` iterator. steps_per_epoch: Total number of steps (batches of samples) before declaring one epoch finished and starting the next epoch. epoch_logs: Dictionary of logs from every epoch. val_inputs: Input data for validation. val_targets: Target data for validation. val_sample_weights: Sample weight data for validation. epochs: Number of times to iterate over the data verbose: Verbosity mode, 0, 1 or 2 callbacks: CallbackList instance. Controls callbacks during training. validation_steps: Number of steps to run validation for (only if doing validation from data tensors). Ignored with default value of `None`. do_validation: Boolean value indicating whether we should do validation. batch_size: int, val_inputs and val_targets will be evaled batch by batch with size batch_size if they are array. output_loss_metrics: List of metrics that are used to aggregated output loss values. Raises: ValueError: In case of mismatch between given number of inputs and expectations of the model. """ assert isinstance(inputs, iterator_ops.EagerIterator) # make sure either x,y or x,y,sample_weights is provided if (not isinstance(inputs.output_shapes, (list, tuple)) or len(inputs.output_shapes) not in (2, 3)): raise ValueError('Please provide either inputs and targets ' 'or inputs, targets, and sample_weights') for step_index in range(steps_per_epoch): batch_logs = {'batch': step_index, 'size': 1} callbacks.on_batch_begin(step_index, batch_logs) # Get data from the iterator. try: next_element = inputs.get_next() except errors.OutOfRangeError: logging.warning( 'Your dataset iterator ran out of data; interrupting training. Make ' 'sure that your dataset can generate at least ' '`steps_per_epoch * epochs` batches (in this case, %d batches). You ' 'may need to use the repeat() function when building your ' 'dataset.' % steps_per_epoch * epochs) break if len(inputs.output_shapes) == 2: x, y = next_element sample_weights = None else: x, y, sample_weights = next_element # Validate and standardize data. x, y, sample_weights = model._standardize_user_data( x, y, sample_weight=sample_weights, class_weight=class_weight) x = training_utils.cast_if_floating_dtype(x) y = training_utils.cast_if_floating_dtype(y) if sample_weights: sample_weights = [ training_utils.cast_if_floating_dtype( ops.convert_to_tensor(val, dtype=backend.floatx())) if val is not None else None for val in sample_weights ] # Set stateful_metrics in callbacks. We do not do this before the # `steps_per_epoch` loop because model will be compiled only in the first # iteration of this loop in the deferred build scenario. if step_index == 0: for cbk in callbacks: if (isinstance(cbk, cbks.BaseLogger) or isinstance(cbk, cbks.ProgbarLogger)): cbk.stateful_metrics = model.metrics_names[ 1:] # Exclude `loss` if step_index == 0 and not callbacks.params['metrics']: callback_metrics = copy.copy(model.metrics_names) if do_validation: callback_metrics += ['val_' + n for n in model.metrics_names] callbacks.set_params({ 'batch_size': batch_size, 'epochs': epochs, 'steps': steps_per_epoch, 'verbose': verbose, 'do_validation': do_validation, 'metrics': callback_metrics or [], 'validation_steps': validation_steps }) # Train model. outs, loss, _, aggregated_loss_metrics, masks = _process_single_batch( model, x, y, output_loss_metrics=output_loss_metrics, sample_weights=sample_weights, training=True) outs = generic_utils.to_list(outs) # Calculate metrics. for l, o in zip(model.metrics_names, outs): batch_logs[l] = o metrics_results = _eager_metrics_fn(model, outs, y, sample_weights=sample_weights, masks=masks) batch_logs['loss'] = tensor_util.constant_value(backend.mean(loss)) for k, v in zip(model.metrics_names, [backend.mean(loss)] + aggregated_loss_metrics + metrics_results): batch_logs[k] = tensor_util.constant_value(v) callbacks.on_batch_end(step_index, batch_logs) if callbacks.model.stop_training: break if step_index == steps_per_epoch - 1: if do_validation: val_outs = test_loop(model, val_inputs, val_targets, sample_weights=val_sample_weights, steps=validation_steps, verbose=0, batch_size=batch_size) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. for l, o in zip(model.metrics_names, val_outs): epoch_logs['val_' + l] = o
def load_training_images_3d(direc_name, training_direcs, raw_image_direc, channel_names, image_size, num_frames, montage_mode=False): """Load each image in the training_direcs into a numpy array. Args: direc_name (str): directory containing folders of training data training_direcs (str[]): list of directories of images inside direc_name. raw_image_direc (str): directory name inside each training dir with raw images channel_names (str[]): Loads all raw images with a channel_name in the filename image_size (tuple): size of each image as tuple (x, y) num_frames (int): number of frames to load from each training directory montage_mode (bool): load masks from "montaged" subdirs inside annotation_direc Returns: numpy.array: 5D tensor of raw image data """ is_channels_first = K.image_data_format() == 'channels_first' image_size_x, image_size_y = image_size # flatten list of lists X_dirs = [os.path.join(direc_name, t, raw_image_direc) for t in training_direcs] if montage_mode: X_dirs = [os.path.join(t, p) for t in X_dirs for p in os.listdir(t)] X_dirs = sorted_nicely(X_dirs) # Initialize training data array if is_channels_first: X_shape = (len(X_dirs), len(channel_names), num_frames, image_size_x, image_size_y) else: X_shape = (len(X_dirs), num_frames, image_size_x, image_size_y, len(channel_names)) X = np.zeros(X_shape, dtype=K.floatx()) # Load 3D training images for b, direc in enumerate(X_dirs): for c, channel in enumerate(channel_names): imglist = nikon_getfiles(direc, channel) for i, img in enumerate(imglist): if i >= num_frames: print('Skipped final {skip} frames of {dir}, as num_frames ' 'is {num} but there are {total} total frames'.format( skip=len(imglist) - num_frames, dir=direc, num=num_frames, total=len(imglist))) break image_data = np.asarray(get_image(os.path.join(direc, img))) if is_channels_first: X[b, c, i, :, :] = image_data else: X[b, i, :, :, c] = image_data return X
def __init__(self, max_tokens, num_oov_indices, mask_token, oov_token, vocabulary=None, invert=False, output_mode=INT, sparse=False, pad_to_max_tokens=False, **kwargs): # If max_tokens is set, the value must be greater than 1 - otherwise we # are creating a 0-element vocab, which doesn't make sense. if max_tokens is not None and max_tokens <= 1: raise ValueError("If set, `max_tokens` must be greater than 1. " "You passed {}".format(max_tokens)) if num_oov_indices < 0: raise ValueError( "`num_oov_indices` must be greater than or equal to 0. " "You passed {}".format(num_oov_indices)) # 'output_mode' must be one of (INT, BINARY, COUNT, TFIDF) layer_utils.validate_string_arg(output_mode, allowable_strings=(INT, BINARY, COUNT, TFIDF), layer_name=self.__class__.__name__, arg_name="output_mode") self.invert = invert self.max_tokens = max_tokens self.num_oov_indices = num_oov_indices self.oov_token = oov_token self.mask_token = mask_token self.output_mode = output_mode self.sparse = sparse self.pad_to_max_tokens = pad_to_max_tokens self._called = False self._num_special_tokens = self.num_oov_indices if self.mask_token is not None: self._num_special_tokens += 1 # If there is only one OOV bucket, we can determine the OOV value (either 0 # or 1 depending on whether 0 is reserved) and set that as the default # value of the index_lookup table. If we hav multiple OOV values, we need to # do a further hashing step; to make this easier, we set the OOV value to # -1. (This lets us do a vectorized add and cast to boolean to determine # locations where we need to do extra hashing.) if self.num_oov_indices == 1: self._oov_value = 0 if mask_token is None else 1 else: self._oov_value = -1 if max_tokens is not None: available_vocab_size = max_tokens - self._num_special_tokens else: available_vocab_size = None super(IndexLookup, self).__init__(combiner=_IndexLookupCombiner( vocab_size=available_vocab_size, mask_value=mask_token, oov_value=oov_token, compute_idf=(output_mode == TFIDF)), **kwargs) # We need to save the key dtype so that we know if we're expecting int64 # keys. If we are, we will cast int32 inputs to int64 as well. if invert: self._key_dtype = dtypes.int64 self._value_dtype = self.dtype oov_value = self.oov_token oov_indices = None else: self._key_dtype = self.dtype self._value_dtype = dtypes.int64 oov_value = self._oov_value if self.num_oov_indices <= 1: oov_indices = None else: oov_start = 1 if mask_token is not None else 0 oov_end = oov_start + num_oov_indices oov_indices = list(range(oov_start, oov_end)) if vocabulary is not None and isinstance( vocabulary, lookup_ops.TextFileInitializer): self._table = self._static_table_class()(vocabulary, default_value=oov_value) self._table_handler = table_utils.TableHandler( table=self._table, mask_token=mask_token, oov_tokens=oov_indices, use_v1_apis=self._use_v1_apis()) self.max_tokens = (self._table_handler.table_size() + self.num_oov_indices + (0 if mask_token is None else 1)) else: self._table = lookup_ops.MutableHashTable( key_dtype=self._key_dtype, value_dtype=self._value_dtype, default_value=oov_value, name=(self._name + "_index_table")) self._table_handler = table_utils.TableHandler( table=self._table, oov_tokens=oov_indices, use_v1_apis=self._use_v1_apis()) if vocabulary is not None: self.set_vocabulary(vocabulary) if self.output_mode == TFIDF: # The TF-IDF weight may have a (None,) tensorshape. This creates # a 1D variable with arbitrary shape, which we can assign any weight to # so long as it has 1 dimension. In order to properly initialize this # weight in Keras, we need to provide a custom callable initializer which # does not depend on the shape of the weight (as all other initializers # do) since the weight is not known. Hence the lambda shape, dtype: [0]. if not self.pad_to_max_tokens or max_tokens is None: initializer = lambda shape, dtype: [0] else: initializer = init_ops.zeros_initializer # We are adding these here instead of in build() since they do not depend # on the input shape at all. idf_shape = (max_tokens, ) if self.pad_to_max_tokens else (None, ) self.tf_idf_weights = self._add_state_variable( name="idf", shape=tensor_shape.TensorShape(idf_shape), dtype=K.floatx(), initializer=initializer) tracked_table = self._add_trackable(self._table, trainable=False) # This is a workaround for summary() on this layer. Because the table is # not mutable during training, the effective number of parameters (and so # the weight shape) is 0; we add this as an attr so that the parameter # counting code in the Model object doesn't throw an attribute error. tracked_table.shape = tensor_shape.TensorShape((0, ))
def __init__(self, max_tokens, num_oov_indices, mask_token, oov_token, vocabulary=None, invert=False, output_mode=INT, sparse=False, pad_to_max_tokens=False, **kwargs): # If max_tokens is set, the value must be greater than 1 - otherwise we # are creating a 0-element vocab, which doesn't make sense. if max_tokens is not None and max_tokens <= 1: raise ValueError("If set, `max_tokens` must be greater than 1. " "You passed {}".format(max_tokens)) if num_oov_indices < 0: raise ValueError( "`num_oov_indices` must be greater than or equal to 0. " "You passed {}".format(num_oov_indices)) # Support deprecated names for output_modes. if output_mode == "binary": output_mode = MULTI_HOT if output_mode == "tf-idf": output_mode = TF_IDF # 'output_mode' must be one of (INT, MULTI_HOT, COUNT, TF_IDF) layer_utils.validate_string_arg(output_mode, allowable_strings=(INT, MULTI_HOT, COUNT, TF_IDF), layer_name=self.__class__.__name__, arg_name="output_mode") if invert and output_mode != INT: raise ValueError( "`output_mode` must be {} when `invert` is true. You " "passed {}".format(INT, output_mode)) self.invert = invert self.max_tokens = max_tokens self.num_oov_indices = num_oov_indices self.output_mode = output_mode self.sparse = sparse self.pad_to_max_tokens = pad_to_max_tokens self._called = False # A note on vocab_size: we need to always keep a non-Tensor representation # of vocab_size around to use in graph building. Because we might be # in a tf.function, we can't rely on evaluating the actual tables to # find the value either. self._vocab_size = None # We need to keep track our current vocab size outside of our layer weights # to support a static output shape when `output_mode != INT`. The bincount # ops do not set shape on their outputs, which means we have to set it # ourselves. We persist the current vocab size as a hidden part of the # config when serializing our model. if "vocabulary_size" in kwargs: self._vocab_size = kwargs["vocabulary_size"] del kwargs["vocabulary_size"] restore_from_static_table = kwargs.pop("has_static_table", False) # Make sure the mask token and oov token are truly of the dtype we want. We # can ignore strings here, because they have only one dtype. dtype = kwargs["dtype"] if dtype == dtypes.int32: mask_token = None if mask_token is None else np.int32(mask_token) oov_token = None if oov_token is None else np.int32(oov_token) elif dtype == dtypes.int64: mask_token = None if mask_token is None else np.int64(mask_token) oov_token = None if oov_token is None else np.int64(oov_token) self.mask_token = mask_token self.oov_token = oov_token if max_tokens is not None: available_vocab_size = max_tokens - self._token_start_index() else: available_vocab_size = None super(IndexLookup, self).__init__(combiner=_IndexLookupCombiner( vocab_size=available_vocab_size, mask_value=mask_token, oov_value=oov_token, compute_idf=(output_mode == TF_IDF)), **kwargs) # We need to save the key dtype so that we know if we're expecting int64 # keys. If we are, we will cast int32 inputs to int64 as well. if invert: self._key_dtype = dtypes.int64 self._value_dtype = self.dtype self._mask_key = 0 self._mask_value = mask_token key_index = lookup_ops.TextFileIndex.LINE_NUMBER value_index = lookup_ops.TextFileIndex.WHOLE_LINE default_value = self.oov_token oov_indices = None else: self._key_dtype = self.dtype self._value_dtype = dtypes.int64 self._mask_key = mask_token key_index = lookup_ops.TextFileIndex.WHOLE_LINE value_index = lookup_ops.TextFileIndex.LINE_NUMBER # Masks should map to 0 for int output and be dropped otherwise. Max ints # will be dropped from the bincount op. self._mask_value = 0 if self.output_mode == INT else dtypes.int64.max oov_start = self._oov_start_index() token_start = self._token_start_index() if self.num_oov_indices == 0: # If there are no OOV indices, we map OOV tokens to -1 and error out # during call if we find a negative index. default_value = -1 oov_indices = None elif self.num_oov_indices == 1: # If there is only one OOV index, we can set that index as the default # value of the index_lookup table. default_value = oov_start oov_indices = None else: # If we hav multiple OOV values, we need to do a further hashing step; # to make this easier, we set the OOV value to -1. (This lets us do a # vectorized add and cast to boolean to determine locations where we # need to do extra hashing.) default_value = -1 oov_indices = list(range(oov_start, token_start)) self._static_vocabulary_path = None has_vocab_path = (vocabulary is not None and isinstance(vocabulary, str)) if has_vocab_path or restore_from_static_table: self._has_static_table = True if vocabulary is None: # If we're restoring a layer that was saved with a static table # initializer, we create a fake initializer object to let the code # progress. The savedmodel restoration code will handle restoring # the actual data. initializer = _NullInitializer(self._key_dtype, self._value_dtype) else: if not gfile.Exists(vocabulary): raise ValueError("Vocabulary file %s does not exist." % (vocabulary, )) self._static_vocabulary_path = vocabulary num_tokens = table_utils.num_tokens_in_file(vocabulary) self._vocab_size = self._token_start_index() + num_tokens initializer = lookup_ops.TextFileInitializer( filename=vocabulary, key_dtype=self._key_dtype, key_index=key_index, value_dtype=self._value_dtype, value_index=value_index, value_index_offset=self._token_start_index()) self._table = lookup_ops.StaticHashTable( initializer, default_value=default_value) self._table_handler = table_utils.TableHandler( table=self._table, mask_token=self._mask_key if self.mask_token is not None else None, mask_value=self._mask_value, oov_tokens=oov_indices) tracked_table = self._add_trackable(self._table, trainable=False) else: self._has_static_table = False self._table = lookup_ops.MutableHashTable( key_dtype=self._key_dtype, value_dtype=self._value_dtype, default_value=default_value, name=(self._name + "_index_table")) self._table_handler = table_utils.TableHandler( table=self._table, oov_tokens=oov_indices) if vocabulary is not None: self.set_vocabulary(vocabulary) tracked_table = self._add_trackable(self._table, trainable=False) if self.output_mode == TF_IDF: # The TF-IDF weight may have a (None,) tensorshape. This creates # a 1D variable with arbitrary shape, which we can assign any weight to # so long as it has 1 dimension. In order to properly initialize this # weight in Keras, we need to provide a custom callable initializer which # does not depend on the shape of the weight (as all other initializers # do) since the weight is not known. Hence the lambda shape, dtype: [0]. if not self.pad_to_max_tokens or max_tokens is None: initializer = lambda shape, dtype: [0] else: initializer = init_ops.zeros_initializer # We are adding these here instead of in build() since they do not depend # on the input shape at all. idf_shape = (max_tokens, ) if self.pad_to_max_tokens else (None, ) self.tf_idf_weights = self._add_state_variable( name="idf", shape=tensor_shape.TensorShape(idf_shape), dtype=backend.floatx(), initializer=initializer) # This is a workaround for summary() on this layer. Because the table is # not mutable during training, the effective number of parameters (and so # the weight shape) is 0; we add this as an attr so that the parameter # counting code in the Model object doesn't throw an attribute error. tracked_table.shape = tensor_shape.TensorShape((0, ))