Exemplo n.º 1
0
    def build(self, input_shape):
        # responsible for trainable self.kernel weights
        super(_ConvBatchNorm2D, self).build(input_shape)

        # resposible for trainable gamma and beta weights
        self.batchnorm.build(self.compute_output_shape(input_shape))

        if self.is_quantized:
            self._weight_min_var = self.add_variable(
                'weight_min',
                initializer=initializers.Constant(-6.0),
                trainable=False)
            self._weight_max_var = self.add_variable(
                'weight_max',
                initializer=initializers.Constant(6.0),
                trainable=False)

            self.optimizer_step = self.add_weight(
                'optimizer_step',
                initializer=initializers.Constant(-1),
                dtype=dtypes.int32,
                trainable=False)

            self.post_activation = quantize_aware_activation.QuantizeAwareActivation(
                self.post_activation, self.activation_quantizer,
                self.optimizer_step, self)
Exemplo n.º 2
0
    def build(self, input_shape):
        assert len(input_shape) >= 2
        self.input_dim = input_shape[-1]

        self.kernel = self.add_weight(shape=(self.input_dim, self.units),
                                      initializer=self.kernel_initializer,
                                      name='kernel',
                                      regularizer=self.kernel_regularizer,
                                      constraint=self.kernel_constraint)

        self.sigma_kernel = self.add_weight(
            shape=(self.input_dim, self.units),
            initializer=initializers.Constant(value=self.sigma_init),
            name='sigma_kernel')

        if self.use_bias:
            self.bias = self.add_weight(shape=(self.units, ),
                                        initializer=self.bias_initializer,
                                        name='bias',
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint)
            self.sigma_bias = self.add_weight(
                shape=(self.units, ),
                initializer=initializers.Constant(value=self.sigma_init),
                name='sigma_bias')
        else:
            self.bias = None
            self.epsilon_bias = None
        # self.sample_noise()
        super(NoisyDense, self).build(input_shape)
Exemplo n.º 3
0
    def _build_for_quantization(self):
        """All Keras build() logic for quantization for fused layers."""
        if not self.is_quantized:
            return

        self._weight_min_var = self.add_variable(  # pylint: disable=protected-access
            'weight_min',
            initializer=initializers.Constant(-6.0),
            trainable=False)
        self._weight_max_var = self.add_variable(  # pylint: disable=protected-access
            'weight_max',
            initializer=initializers.Constant(6.0),
            trainable=False)

        self.optimizer_step = self.add_weight(
            'optimizer_step',
            initializer=initializers.Constant(-1),
            dtype=dtypes.int32,
            trainable=False)

        # TODO(alanchiao): re-explore if we can handle this with
        # QuantizeAwareActivation.
        self._activation_min_var = self.add_variable(  # pylint: disable=protected-access
            'activation_min',
            initializer=initializers.Constant(-6.0),
            trainable=False)
        self._activation_max_var = self.add_variable(  # pylint: disable=protected-access
            'activation_max',
            initializer=initializers.Constant(6.0),
            trainable=False)
Exemplo n.º 4
0
  def build(self, input_shape):
    super(QuantizeEmulateWrapper, self).build(input_shape)

    min_weights, max_weights = [], []
    # For each of the quantizable_weights, construct the necessary variables.
    # TODO(alanchiao): when validated, add per-channel as parameter, which
    # affects shape and other factors.
    for weight in self.layer.get_quantizable_weights():
      min_var = self.add_variable(
          'weight_min',
          initializer=initializers.Constant(-6.0),
          trainable=False)
      max_var = self.add_variable(
          'weight_max', initializer=initializers.Constant(6.0), trainable=False)
      self._unquantized_kernels.append(weight)
      min_weights.append(min_var)
      max_weights.append(max_var)

      # set_quantizable_weights on the wrapped layer removes unquantized_kernel
      # from _trainable_weights. We add it to the wrappers _trainable_weights
      # to ensure it gets gradient updates.
      self._trainable_weights.append(weight)

    self._weight_vars = list(
        zip(self._unquantized_kernels, min_weights, max_weights))
    self._min_activation = self.add_variable(
        'activation_min',
        initializer=initializers.Constant(-6.0),
        trainable=False)
    self._max_activation = self.add_variable(
        'activation_max',
        initializer=initializers.Constant(6.0),
        trainable=False)
Exemplo n.º 5
0
    def _add_range_weights(self, layer, name):
        min_weight = layer.add_weight(name + '_min',
                                      initializer=initializers.Constant(-6.0),
                                      trainable=False)
        max_weight = layer.add_weight(name + '_max',
                                      initializer=initializers.Constant(6.0),
                                      trainable=False)

        return [min_weight, max_weight]
Exemplo n.º 6
0
    def _add_range_weights(self, name):
        min_var = self.quantize_wrapper.add_weight(
            name + '_min',
            initializer=initializers.Constant(-6.0),
            trainable=False)
        max_var = self.quantize_wrapper.add_weight(
            name + '_max',
            initializer=initializers.Constant(6.0),
            trainable=False)

        return min_var, max_var
  def build(self, tensor_shape, name, layer):
    min_weight = layer.add_weight(
        name + '_min',
        shape=(tensor_shape[-1],),
        initializer=initializers.Constant(-6.0),
        trainable=False)
    max_weight = layer.add_weight(
        name + '_max',
        shape=(tensor_shape[-1],),
        initializer=initializers.Constant(6.0),
        trainable=False)

    return [min_weight, max_weight]
Exemplo n.º 8
0
def test_forward_works_with_mask(numpy_crf):
    logits = np.array([
        [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
        [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
    ])
    transitions = np.array([
        [0.1, 0.2, 0.3, 0.4, 0.5],
        [0.8, 0.3, 0.1, 0.7, 0.9],
        [-0.3, 2.1, -5.6, 3.4, 4.0],
        [0.2, 0.4, 0.6, -0.3, -0.4],
        [1.0, 1.0, 1.0, 1.0, 1.0]
    ])

    boundary_transitions = np.array([0.1, 0.2, 0.3, 0.4, 0.6])

    tags = np.array([
            [2, 3, 4],
            [3, 2, 2]
    ])

    # Use the CRF Module with fixed transitions to compute the log_likelihood
    crf = CRF(
        units=5,
        use_kernel=False,  # disable kernel transform
        chain_initializer=initializers.Constant(transitions),
        use_boundary=True,
        boundary_initializer=initializers.Constant(boundary_transitions),
        name="crf_layer"
    )
    # Use a non-trivial mask
    mask = np.array([
            [1, 1, 1],
            [1, 1, 0]
    ])

    crf_loss_instance = ConditionalRandomFieldLoss()

    model = Sequential()
    model.add(layers.Input(shape=(3, 5)))
    model.add(MockMasking(mask_shape=(2, 3), mask_value=mask))
    model.add(crf)
    model.compile('adam', loss={"crf_layer": crf_loss_instance})

    result = model.train_on_batch(logits, tags)


    numpy_crf_instance = numpy_crf(logits, mask, transitions, boundary_transitions, boundary_transitions)
    expected = numpy_crf_instance.compute_log_likehood(tags) / -2

    assert result == approx(expected)
Exemplo n.º 9
0
    def _strip_clustering_wrapper(layer):
        if isinstance(layer, cluster_wrapper.ClusterWeights):
            if not hasattr(layer.layer, '_batch_input_shape') and\
                hasattr(layer, '_batch_input_shape'):
                layer.layer._batch_input_shape = layer._batch_input_shape

            # We reset both arrays of weights, so that we can guarantee the correct
            # order of newly created weights
            layer.layer._trainable_weights = []
            layer.layer._non_trainable_weights = []
            for i in range(len(layer.restore)):
                # This is why we used integers as keys
                name, weight = layer.restore[i]
                # In both cases we use k.batch_get_value since we need physical copies
                # of the arrays to initialize a new tensor
                if i in layer.gone_variables:
                    # If the variable was removed because it was clustered, we restore it
                    # by using updater we created earlier
                    new_weight_value = k.batch_get_value([weight()])[0]
                else:
                    # If the value was not clustered(e.g. bias), we still store a valid
                    # reference to the tensor. We use this reference to get the value
                    new_weight_value = k.batch_get_value([weight])[0]
                layer.layer.add_weight(
                    name=name,
                    shape=new_weight_value.shape,
                    initializer=initializers.Constant(new_weight_value),
                    trainable=True)
            # When all weights are filled with the values, just return the underlying
            # layer since it is now fully autonomous from its wrapper
            return layer.layer
        return layer
    def build(self, input_shape):
        super(QuantizeWrapper, self).build(input_shape)

        self.optimizer_step = self.add_weight(
            'optimizer_step',
            initializer=initializers.Constant(-1),
            dtype=dtypes.int32,
            trainable=False)

        self._weight_vars = []
        for weight, quantizer in \
            self.quantize_provider.get_weights_and_quantizers(self.layer):
            min_var, max_var = quantizer.build(weight.shape,
                                               self._weight_name(weight.name),
                                               self)

            self._weight_vars.append((weight, quantizer, min_var, max_var))
            # Needed to ensure unquantized weights get trained as part of the wrapper.
            self._trainable_weights.append(weight)

        self._quantize_activations = []
        for activation, quantizer in \
            self.quantize_provider.get_activations_and_quantizers(self.layer):
            quantize_activation = quantize_aware_activation.QuantizeAwareActivation(
                activation, quantizer, self.optimizer_step, self)

            self._quantize_activations.append(quantize_activation)

        self._output_quantizers = self.quantize_provider.get_output_quantizers(
            self.layer)
        if self._output_quantizers:
            self._output_min_max = self._output_quantizers[0].build(
                self.layer.compute_output_shape(input_shape), 'output', self)
Exemplo n.º 11
0
    def __init__(self, bias=-3, **kwargs):
        super(HighwayNetStep, self).__init__(**kwargs)
        self.bias = initializers.Constant(value=bias)

        self.multiply1 = Multiply()
        self.multiply2 = Multiply()
        self.add = Add()
    def _train_CNN_Glove(self,
                         X_train,
                         y_train,
                         epochs=5,
                         batch_size=64,
                         learning_rate=0.001,
                         regularization=0.01):
        """
        Trains CNN
        - X_train: Input sequence
        - y_train: Target sequence
        - epochs
        - batch_size
        - learning_rate = Adam optimizer's learning rate
        - reg: Regularization
        Returns :
        - history: Scalar loss
        """
        flatten_y = [category for sublist in y_train for category in sublist]
        class_weights = class_weight.compute_class_weight(
            'balanced', np.unique(flatten_y), flatten_y)
        optim = tf.keras.optimizers.Adam(learning_rate=learning_rate)

        embedding_matrix = self.create_embedding_matrix()

        model = models.Sequential()
        model.add(
            Embedding(
                input_dim=self.max_word_count,
                output_dim=100,
                embeddings_initializer=initializers.Constant(embedding_matrix),
                input_length=self.max_sequence_len,
                trainable=False))
        model.add(
            Conv1D(filters=300,
                   kernel_size=3,
                   padding='valid',
                   activation='relu',
                   strides=1))
        model.add(GlobalMaxPool1D())
        model.add(Dense(8, activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer=optim,
                      metrics=[BinaryAccuracy()])
        history = model.fit(X_train,
                            y_train,
                            class_weight=class_weight,
                            epochs=epochs,
                            batch_size=batch_size,
                            validation_split=0.25,
                            verbose=self.verbose,
                            callbacks=[
                                EarlyStopping(monitor='val_loss',
                                              patience=3,
                                              min_delta=0.0001)
                            ])

        self.model = model
        self.history = history.history
Exemplo n.º 13
0
def test_viterbi_tags(numpy_crf):
    logits = np.array([
        [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
        [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
    ])
    transitions = np.array([
        [0.1, 0.2, 0.3, 0.4, 0.5],
        [0.8, 0.3, 0.1, 0.7, 0.9],
        [-0.3, 2.1, -5.6, 3.4, 4.0],
        [0.2, 0.4, 0.6, -0.3, -0.4],
        [1.0, 1.0, 1.0, 1.0, 1.0]
    ])

    boundary_transitions = np.array([0.1, 0.2, 0.3, 0.4, 0.6])

    # Use the CRF Module with fixed transitions to compute the log_likelihood
    crf = CRF(
        units=5,
        use_kernel=False,  # disable kernel transform
        chain_initializer=initializers.Constant(transitions),
        use_boundary=True,
        boundary_initializer=initializers.Constant(boundary_transitions),
        name="crf_layer"
    )
    mask = np.array([
            [1, 1, 1],
            [1, 1, 0]
    ])

    crf_loss_instance = ConditionalRandomFieldLoss()

    model = Sequential()
    model.add(layers.Input(shape=(3, 5)))
    model.add(MockMasking(mask_shape=(2, 3), mask_value=mask))
    model.add(crf)
    model.compile('adam', loss={"crf_layer": crf_loss_instance})

    # Separate the tags and scores.
    result = model.predict(logits)

    numpy_crf_instance = numpy_crf(logits, mask, transitions, boundary_transitions, boundary_transitions)
    expected, _ = numpy_crf_instance.decode()

    np.testing.assert_equal(result, expected)
Exemplo n.º 14
0
    def setUp(self):
        super().setUp()

        self.logits = np.array([
                [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
                [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ])
        self.tags = np.array([
                [2, 3, 4],
                [3, 2, 2]
        ])

        self.transitions = np.array([
                [0.1, 0.2, 0.3, 0.4, 0.5],
                [0.8, 0.3, 0.1, 0.7, 0.9],
                [-0.3, 2.1, -5.6, 3.4, 4.0],
                [0.2, 0.4, 0.6, -0.3, -0.4],
                [1.0, 1.0, 1.0, 1.0, 1.0]
        ])

        self.transitions_from_start = np.array([0.1, 0.2, 0.3, 0.4, 0.6])
        self.transitions_to_end = np.array([-0.1, -0.2, 0.3, -0.4, -0.4])

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = CRF(
            units=5,
            use_kernel=False,  # disable kernel transform
            chain_initializer=initializers.Constant(self.transitions),
            use_boundary=True,
            # left_boundary_initializer=initializers.Constant(self.transitions_from_start),
            # right_boundary_initializer=initializers.Constant(self.transitions_to_end),
            name="crf_layer"
        )
        self.crf.left_boundary = self.crf.add_weight(
            shape=(self.crf.units,),
            name="left_boundary",
            initializer=initializers.Constant(self.transitions_from_start),
        )
        self.crf.right_boundary = self.crf.add_weight(
            shape=(self.crf.units,),
            name="right_boundary",
            initializer=initializers.Constant(self.transitions_to_end),
        )
Exemplo n.º 15
0
    def build(self, input_shape):
        batch_size, input_dim, input_atoms = input_shape

        self.kernel = self.add_weight(name="kernel", initializer=initializers.VarianceScaling(scale=0.1),
                                      shape=[*self.kernel_size, input_atoms, self.output_dim * self.output_atoms])
        self.bias = self.add_weight(name="bias", initializer=initializers.Constant(value=0.1),
                                    shape=[self.output_dim, self.output_atoms])

        self.input_dim = input_dim
        self.input_atoms = input_atoms
    def build(self, input_shape):
        if self._requires_pre_quant():
            self._min_pre_activation = self.add_variable(
                'min_pre_activation',
                initializer=initializers.Constant(-6.0),
                trainable=False)
            self._max_pre_activation = self.add_variable(
                'max_pre_activation',
                initializer=initializers.Constant(6.0),
                trainable=False)

        self._min_post_activation = self.add_variable(
            'min_post_activation',
            initializer=initializers.Constant(-6.0),
            trainable=False)
        self._max_post_activation = self.add_variable(
            'max_post_activation',
            initializer=initializers.Constant(6.0),
            trainable=False)
Exemplo n.º 17
0
    def build(self, input_shape):
        input_shape = tuple(tf.TensorShape(input_shape).as_list())
        self.input_spec = [tf.keras.layers.InputSpec(shape=input_shape)]
        self.input_dim = input_shape[-1]

        self.mask = self.add_weight(shape=self.mask_shape,
                                    name='transition_constraint_mask',
                                    initializer=initializers.Constant(
                                        self.mask_value),
                                    trainable=False)

        # or directly call self.built = True
        super(MockMasking, self).build(input_shape)
Exemplo n.º 18
0
 def __init__(self,
              rank,
              filters,
              kernel_size,
              strides=1,
              padding='valid',
              data_format=None,
              dilation_rate=1,
              activation=None,
              is_mc=True,
              use_bias=True,
              kernel_initializer='glorot_uniform',
              bias_initializer='zeros',
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              kernel_constraint=None,
              bias_constraint=None,
              trainable=True,
              name=None,
              **kwargs):
     super(VWNConv, self).__init__(
         trainable=trainable,
         name=name,
         activity_regularizer=regularizers.get(activity_regularizer),
         **kwargs)
     self.rank = rank
     self.filters = filters
     self.kernel_size = conv_utils.normalize_tuple(kernel_size, rank,
                                                   'kernel_size')
     self.strides = conv_utils.normalize_tuple(strides, rank, 'strides')
     self.padding = conv_utils.normalize_padding(padding)
     if (self.padding == 'causal'
             and not isinstance(self, (Conv1D, SeparableConv1D))):
         raise ValueError('Causal padding is only supported for `Conv1D`'
                          'and ``SeparableConv1D`.')
     self.data_format = conv_utils.normalize_data_format(data_format)
     self.dilation_rate = conv_utils.normalize_tuple(
         dilation_rate, rank, 'dilation_rate')
     self.is_mc = tf.cast(is_mc, dtype=tf.bool)
     self.activation = activations.get(activation)
     self.use_bias = use_bias
     self.kernel_initializer = initializers.get(kernel_initializer)
     self.bias_initializer = initializers.get(bias_initializer)
     self.kernel_regularizer = regularizers.get(kernel_regularizer)
     self.bias_regularizer = regularizers.get(bias_regularizer)
     self.kernel_constraint = constraints.get(kernel_constraint)
     self.bias_constraint = constraints.get(bias_constraint)
     self.input_spec = InputSpec(ndim=self.rank + 2)
     self.a_initializer = initializers.Constant(
         1e-04)  # ADDED  (what is a)  (use keras initializers??)
Exemplo n.º 19
0
 def build(self, input_shape):
     assert len(input_shape) == 3
     d_model = int(input_shape[-1])
     self.act_weights['halting_kernel'] = self.add_weight(
         name='halting_kernel',
         shape=(d_model, 1),
         initializer='glorot_uniform',
         trainable=True)
     self.act_weights['halting_biases'] = self.add_weight(
         name='halting_biases',
         shape=(1, ),
         initializer=initializers.Constant(0.1),
         trainable=True)
     self.act_weights['time_penalty_t'] = K.constant(self.time_penalty,
                                                     dtype=K.floatx())
     return super().build(input_shape)
Exemplo n.º 20
0
    def build(self, input_shape):
        super(PruneLowMagnitude, self).build(input_shape)

        weight_vars, mask_vars, threshold_vars = [], [], []

        self.prunable_weights = self.layer.get_prunable_weights()

        # For each of the prunable weights, add mask and threshold variables
        for weight in self.prunable_weights:
            mask = self.add_variable(
                'mask',
                shape=weight.shape,
                initializer=initializers.get('ones'),
                dtype=weight.dtype,
                trainable=False,
                aggregation=tf_variables.VariableAggregation.MEAN)
            threshold = self.add_variable(
                'threshold',
                shape=[],
                initializer=initializers.get('zeros'),
                dtype=weight.dtype,
                trainable=False,
                aggregation=tf_variables.VariableAggregation.MEAN)

            weight_vars.append(weight)
            mask_vars.append(mask)
            threshold_vars.append(threshold)
        self.pruning_vars = list(zip(weight_vars, mask_vars, threshold_vars))

        # Add a scalar tracking the number of updates to the wrapped layer.
        self.pruning_step = self.add_variable(
            'pruning_step',
            shape=[],
            initializer=initializers.Constant(-1),
            dtype=dtypes.int64,
            trainable=False)

        def training_step_fn():
            return self.pruning_step

        # Create a pruning object
        self.pruning_obj = pruning_impl.Pruning(
            training_step_fn=training_step_fn,
            pruning_vars=self.pruning_vars,
            pruning_schedule=self.pruning_schedule,
            block_size=self.block_size,
            block_pooling_type=self.block_pooling_type)
Exemplo n.º 21
0
 def build(self, input_shape):
     assert len(input_shape) == 3
     _, sequence_length, d_model = input_shape
     if not isinstance(d_model, int):
         d_model = d_model.value
     self.halting_kernel = self.add_weight(
         name='halting_kernel',
         shape=(d_model, 1),
         initializer='glorot_uniform',
         trainable=True)
     self.halting_biases = self.add_weight(
         name='halting_biases',
         shape=(1,),
         initializer=initializers.Constant(0.1),
         trainable=True)
     self.time_penalty_t = K.constant(self.time_penalty, dtype=K.floatx())
     return super().build(input_shape)
Exemplo n.º 22
0
    def test_unmasked_constrained_viterbi_tags(self):
        # TODO: using BILUO tag scheme instead of BIO.
        #       So that, transition from tags to end can be tested.

        raw_constraints = np.array([
            #     O     B-X    I-X    B-Y    I-Y  start   end
            [     1,     1,     0,     1,     0,    0,     1],  # O
            [     1,     1,     1,     1,     0,    0,     1],  # B-X
            [     1,     1,     1,     1,     0,    0,     1],  # I-X
            [     1,     1,     0,     1,     1,    0,     1],  # B-Y
            [     1,     1,     0,     1,     1,    0,     1],  # I-Y
            [     1,     1,     0,     1,     0,    0,     0],  # start
            [     0,     0,     0,     0,     0,    0,     0],  # end
        ])

        constraints = np.argwhere(raw_constraints > 0).tolist()

        # transitions = np.array([
        #     #     O     B-X    I-X    B-Y    I-Y
        #     [    0.1,   0.2,   0.3,   0.4,   0.5],  # O
        #     [    0.8,   0.3,   0.1,   0.7,   0.9],  # B-X
        #     [   -0.3,   2.1,  -5.6,   3.4,   4.0],  # I-X
        #     [    0.2,   0.4,   0.6,  -0.3,  -0.4],  # B-Y
        #     [    1.0,   1.0,   1.0,   1.0,   1.0]   # I-Y
        # ])

        transitions = np.ones([5, 5])

        # transitions_from_start = np.array(
        #     #     O     B-X    I-X    B-Y    I-Y
        #     [    0.1,   0.2,   0.3,   0.4,   0.6]  # start
        # )

        transitions_from_start = np.ones(5)

        # transitions_to_end = np.array(
        #     [
        #     #    end
        #         -0.1,  # O
        #         -0.2,  # B-X
        #          0.3,  # I-X
        #         -0.4,  # B-Y
        #         -0.4   # I-Y
        #     ]
        # )

        transitions_to_end = np.ones(5)

        logits = np.array([
            [
            # constraint transition from start to tags
            #     O     B-X    I-X    B-Y    I-Y
                [ 0.,    .1,   1.,     0.,   0.],
                [ 0.,    0.,   1.,     0.,   0.],
                [ 0.,    0.,   1.,     0.,   0.]
            ],
            [
            # constraint transition from tags to tags
            #     O     B-X    I-X    B-Y    I-Y
                [ 0.,    1.,   0.,     0.,   0.],
                [ 0.,    0.,   .1,     1.,   0.],
                [ 0.,    0.,   1.,     0.,   0.]
            ]
        ])

        crf = CRF(
            units=5,
            use_kernel=False,  # disable kernel transform
            chain_initializer=initializers.Constant(transitions),
            use_boundary=True,
            # left_boundary_initializer=initializers.Constant(transitions_from_start),
            # right_boundary_initializer=initializers.Constant(transitions_to_end),
            transition_constraint=constraints,
            name="crf_layer"
        )
        crf.left_boundary = crf.add_weight(
            shape=(5,),
            name="left_boundary",
            initializer=initializers.Constant(self.transitions_from_start),
        )
        crf.right_boundary = crf.add_weight(
            shape=(5,),
            name="right_boundary",
            initializer=initializers.Constant(self.transitions_to_end),
        )

        crf_loss_instance = ConditionalRandomFieldLoss()

        model = Sequential()
        model.add(layers.Input(shape=(3, 5)))
        model.add(crf)
        model.compile('adam', loss={"crf_layer": crf_loss_instance})

        for layer in model.layers:
            print(layer.get_config())
            print(dict(zip(layer.weights, layer.get_weights())))

        # Get just the tags from each tuple of (tags, score).
        viterbi_tags = model.predict(logits)

        # Now the tags should respect the constraints
        expected_tags = [
            [1, 2, 2],  # B-X  I-X  I-X
            [1, 2, 2]   # B-X  I-X  I-X
        ]

        # if constrain not work it should be:
        # [
        #     [2, 4, 3],
        #     [2, 3, 0]
        # ]

        # test assert
        np.testing.assert_equal(viterbi_tags, expected_tags)
Exemplo n.º 23
0
def test_masked_viterbi_decode():
    transitions = np.ones([5, 5])
    transitions_from_start = np.ones(5)
    transitions_to_end = np.ones(5)

    logits = np.array([
        [
        #     O     B-X    I-X    B-Y    I-Y
            [ 0.,    1.,   0.,     0.,   0.],
            [ 0.,    0.,   1.,     0.,   0.],
            [ 0.,    0.,   1.,     0.,   0.]
        ],
        [
        #     O     B-X    I-X    B-Y    I-Y
            [ 0.,    1.,   0.,     0.,   0.],
            [ 0.,    1.,   0.,     0.,   0.],
            [ 0.,    1.,   0.,     0.,   0.]
        ]
    ])

    # TODO: this test case is right padding mask only
    #       due to the underline crf function only support sequence length
    mask = np.array([
            [1, 1, 0],
            [1, 1, 0]
    ])

    crf = CRF(
        units=5,
        use_kernel=False,  # disable kernel transform
        chain_initializer=initializers.Constant(transitions),
        use_boundary=True,
        # left_boundary_initializer=initializers.Constant(transitions_from_start),
        # right_boundary_initializer=initializers.Constant(transitions_to_end),
        name="crf_layer"
    )

    crf_loss_instance = ConditionalRandomFieldLoss()

    model = Sequential()
    model.add(layers.Input(shape=(3, 5)))
    model.add(MockMasking(mask_shape=(2, 3), mask_value=mask))
    model.add(crf)
    model.compile('adam', loss={"crf_layer": crf_loss_instance})

    # for layer in model.layers:
    #     print(layer.get_config())
    #     print(dict(zip(layer.weights, layer.get_weights())))

    # Get just the tags from each tuple of (tags, score).
    result = model.predict(logits)

    # Now the tags should respect the constraints
    expected = [
        [1, 2, 0],  # B-X  I-X  NA
        [1, 1, 0]   # B-X  B-X  NA
    ]

    # if constrain not work it should be:
    # [
    #     [2, 4, 3],
    #     [2, 3, 0]
    # ]

    # test assert
    np.testing.assert_equal(result, expected)
Exemplo n.º 24
0
    def test_constrained_viterbi_tags(self):
        constraints = {(0, 0), (0, 1),
                       (1, 1), (1, 2),
                       (2, 2), (2, 3),
                       (3, 3), (3, 4),
                       (4, 4), (4, 0)}

        # Add the transitions to the end tag
        # and from the start tag.
        for i in range(5):
            constraints.add((5, i))
            constraints.add((i, 6))

        mask = np.array([
                [1, 1, 1],
                [1, 1, 0]
        ])

        crf = CRF(
            units=5,
            use_kernel=False,  # disable kernel transform
            chain_initializer=initializers.Constant(self.transitions),
            use_boundary=True,
            # left_boundary_initializer=initializers.Constant(self.transitions_from_start),
            # right_boundary_initializer=initializers.Constant(self.transitions_to_end),
            transition_constraint=constraints,
            name="crf_layer"
        )
        crf.left_boundary = crf.add_weight(
            shape=(5,),
            name="left_boundary",
            initializer=initializers.Constant(self.transitions_from_start),
        )
        crf.right_boundary = crf.add_weight(
            shape=(5,),
            name="right_boundary",
            initializer=initializers.Constant(self.transitions_to_end),
        )


        crf_loss_instance = ConditionalRandomFieldLoss()

        model = Sequential()
        model.add(layers.Input(shape=(3, 5)))
        model.add(MockMasking(mask_shape=(2, 3), mask_value=mask))
        model.add(crf)
        model.compile('adam', loss={"crf_layer": crf_loss_instance})

        for layer in model.layers:
            print(layer.get_config())
            print(dict(zip(layer.weights, layer.get_weights())))

        # Get just the tags from each tuple of (tags, score).
        viterbi_tags = model.predict(self.logits)

        # Now the tags should respect the constraints
        expected_tags = [
            [2, 3, 3],
            [2, 3, 0]
        ]

        # if constrain not work it should be:
        # [
        #     [2, 4, 3],
        #     [2, 3, 0]
        # ]

        # test assert
        np.testing.assert_equal(viterbi_tags, expected_tags)
    def build(self, input_shape):
        super(ClusterWeights, self).build(input_shape)

        clusterable_weights = self.layer.get_clusterable_weights()

        # Map automatically assigned TF variable name (e.g. 'dense/kernel:0') to provided human readable name
        # (e.g. as in Dense(10).kernel)
        clusterable_weights_to_variables = {}

        for weight_name, weight in clusterable_weights:
            # If a variable appears in this loop, then it is going to be removed from self._trainable_weights.
            # We need to memorise what variables are going away so that later we are able to restore them. We have to do
            # this to maintain the original order of the weights in the underlying layer. Incorrect order results in the
            # incorrect OPs weights configurations.

            # We can be sure that weight will be found in this array since the variable is either in the
            # self._trainable_weights
            # or in self._non_trainable_weights and self.weights is the result of concatenation of those arrays
            original_index = self.layer.weights.index(weight)
            self.gone_variables.append(original_index)

            # Again, not sure if this is needed. Leaving for now.
            clusterable_weights_to_variables[self._weight_name(
                weight.name)] = weight_name

            # Build initial cluster centroids for a given tensor. Factory returns a class and we init an object immediately
            centroid_initializer = clustering_centroids.CentroidsInitializerFactory.get_centroid_initializer(
                self.cluster_centroids_init)(weight, self.number_of_clusters)

            cluster_centroids = centroid_initializer.get_cluster_centroids()

            # Use k.batch_get_value since we need to initialize the variables with an initial value taken from a Tensor object
            # For each weight there is a different set of cluster centroids
            self.cluster_centroids_tf[weight_name] = self.add_weight(
                'cluster_centroids_tf',
                shape=(self.number_of_clusters, ),
                dtype=weight.dtype,
                trainable=True,
                initializer=initializers.Constant(
                    value=k.batch_get_value([cluster_centroids])[0]))

            # There are vectorised implementations of look-ups, we use a new one for different number of dimensions.
            clustering_impl_cls = clustering_registry.ClusteringLookupRegistry(
            ).get_clustering_impl(self.layer, weight_name)
            self.clustering_impl[weight_name] = clustering_impl_cls(
                self.cluster_centroids_tf[weight_name])

            # We find the nearest cluster centroids and store them so that ops can build their weights upon it
            # These indices are calculated once and stored forever. We use to make look-ups from self.cluster_centroids_tf
            pulling_indices = self.clustering_impl[
                weight_name].get_pulling_indices(weight)
            self.pulling_indices_tf[weight_name] = self.add_weight(
                'pulling_indices_tf',
                shape=pulling_indices.shape,
                dtype=tf.int32,
                trainable=False,
                initializer=initializers.Constant(
                    value=k.batch_get_value([pulling_indices])[0]))

            # We store these pairs to easily update this variables later on
            self.clustered_vars.append((weight_name, weight))

        # We use currying here to get an updater which can be triggered at any time in future and it would return
        # the latest version of clustered weights
        def get_updater(for_weight_name):
            def fn():
                return self.clustering_impl[
                    for_weight_name].get_clustered_weight(
                        self.pulling_indices_tf[for_weight_name])

            return fn

        # This will allow us to restore the order of weights later
        # This loop stores pairs of weight names and how to restore them

        for ct, weight in enumerate(self.layer.weights):
            name = self._weight_name(weight.name)
            if ct in self.gone_variables:
                # Again, not sure if this is needed
                weight_name = clusterable_weights_to_variables[name]
                self.restore.append((name, get_updater(weight_name)))
            else:
                self.restore.append((name, weight))