def defineFullModel(self):
        self.TRAIN_FLAG = 0
        outputs = self.defineModel()

        if len(outputs) > 1:
            self.raw_output = Concatenate()(outputs)
        else:  #if only a single chunk
            self.raw_output = outputs[0]

        #pass output logits through activation
        for idx, o in enumerate(outputs):
            outputs[idx] = Lambda(self.params_dict['output_activation'])(o)

        if len(outputs) > 1:
            x = Concatenate()(outputs)
        else:  #if only a single chunk
            x = outputs[0]
        x = Lambda(self.outputDecoder)(x)  #logits
        x = Activation('softmax')(x)  #return probs

        if self.params_dict['base_model'] == None:
            self.model_full = KerasModel(inputs=self.input, outputs=x)
        else:
            self.model_full = KerasModel(
                inputs=self.params_dict['base_model'].input, outputs=x)
Exemplo n.º 2
0
def line_lstm(input_shape,
              output_shape,
              window_width=20,
              window_stride=14,
              decoder_dim=None,
              encoder_dim=None):
    # Here is another way to pass arguments to the Keras Lambda function
    def slide_window_bound(image,
                           window_width=window_width,
                           window_stride=window_stride):
        return slide_window(image, window_width, window_stride)

    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    if encoder_dim is None:
        encoder_dim = 128
    if decoder_dim is None:
        decoder_dim = 128

    image_input = Input(shape=input_shape)
    # (image_height, image_width)

    image_reshaped = Reshape((image_height, image_width, 1))(image_input)
    # (image_height, image_width, 1)

    image_patches = Lambda(slide_window_bound)(image_reshaped)
    # (num_windows, image_height, window_width, 1)

    convnet = lenet((image_height, window_width, 1), (num_classes, ))
    convnet = KerasModel(inputs=convnet.inputs,
                         outputs=convnet.layers[-2].output)
    # (image_height, window_width, 1) -> (128,)

    convnet_outputs = TimeDistributed(convnet)(image_patches)
    # (num_windows, 128)

    gpu_present = len(device_lib.list_local_devices()) > 1
    lstm = CuDNNLSTM if gpu_present else LSTM

    ##### Your code below (Lab 3)
    encoder_output = lstm(encoder_dim,
                          return_sequences=False,
                          go_backwards=True)(convnet_outputs)
    # (encoder_dim)
    repeated_encoding = RepeatVector(output_length)(encoder_output)
    # (max_length, encoder_dim)
    decoder_output = lstm(decoder_dim,
                          return_sequences=True)(repeated_encoding)
    # decoder_output_dropout = Dropout(0.2)(decoder_output)
    # (output_length, decoder_dim)
    ##### Your code above (Lab 3)

    softmax_output = TimeDistributed(Dense(
        num_classes, activation='softmax'))(decoder_output)
    # (max_length, num_classes)

    model = KerasModel(inputs=image_input, outputs=softmax_output)
    return model
Exemplo n.º 3
0
def line_cnn_sliding_window(input_shape: Tuple[int, ...],
                            output_shape: Tuple[int, ...],
                            window_width: float = 16,
                            window_stride: float = 10) -> KerasModel:
    """
    Input is an image with shape (image_height, image_width)
    Output is of shape (output_length, num_classes)
    """
    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    image_input = Input(shape=input_shape)
    # (image_height, image_width)

    image_reshaped = Reshape((image_height, image_width, 1))(image_input)
    # (image_height, image_width, 1)

    image_patches = Lambda(slide_window,
                           arguments={
                               'window_width': window_width,
                               'window_stride': window_stride
                           })(image_reshaped)
    # (num_windows, image_height, window_width, 1)

    # Make a LeNet and get rid of the last two layers (softmax and dropout)
    convnet = lenet((image_height, window_width, 1), (num_classes, ))
    convnet = KerasModel(inputs=convnet.inputs,
                         outputs=convnet.layers[-2].output)

    convnet_outputs = TimeDistributed(convnet)(image_patches)
    # (num_windows, 128)

    # Now we have to get to (output_length, num_classes) shape. One way to do it is to do another sliding window with
    # width = floor(num_windows / output_length)
    # Note that this will likely produce too many items in the output sequence, so take only output_length,
    # and watch out that width is at least 2 (else we will only be able to predict on the first half of the line)

    ##### Your code below (Lab 2)
    convnet_outputs_extra_dim = Lambda(lambda x: tf.expand_dims(x, -1))(
        convnet_outputs)

    num_windows = int((image_width - window_width) / window_stride) + 1
    width = int(num_windows / output_length)

    conved_convnet_outputs = Conv2D(
        num_classes, (width, 128), (width, 1),
        activation='softmax')(convnet_outputs_extra_dim)

    squeezed_conved_convnet_outputs = Lambda(lambda x: tf.squeeze(x, 2))(
        conved_convnet_outputs)

    softmax_output = Lambda(lambda x: x[:, :output_length, :])(
        squeezed_conved_convnet_outputs)
    ##### Your code above (Lab 2)

    model = KerasModel(inputs=image_input, outputs=softmax_output)
    model.summary()
    return model
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14):  # pylint: disable=too-many-locals
    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    num_windows = int((image_width - window_width) / window_stride) + 1
    if num_windows < output_length:
        raise ValueError(f"Window width/stride need to generate >= {output_length} windows (currently {num_windows})")

    image_input = Input(shape=input_shape, name="image")
    y_true = Input(shape=(output_length,), name="y_true")
    input_length = Input(shape=(1,), name="input_length")
    label_length = Input(shape=(1,), name="label_length")

    # Your code should use slide_window and extract image patches from image_input.
    # Pass a convolutional model over each image patch to generate a feature vector per window.
    # Pass these features through one or more LSTM layers.
    # Convert the lstm outputs to softmax outputs.
    # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length).

    # Your code below (Lab 3)
    image_reshaped = Reshape((image_height, image_width, 1))(image_input)
    # (image_height, image_width, 1)

    image_patches = Lambda(slide_window, arguments={"window_width": window_width, "window_stride": window_stride})(
        image_reshaped
    )
    # (num_windows, image_height, window_width, 1)

    # Make a LeNet and get rid of the last two layers (softmax and dropout)
    convnet = lenet((image_height, window_width, 1), (num_classes,))
    convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output)
    convnet_outputs = TimeDistributed(convnet)(image_patches)
    # (num_windows, 128)

    lstm_output = LSTM(128, return_sequences=True)(convnet_outputs)
    # (num_windows, 128)

    softmax_output = Dense(num_classes, activation="softmax", name="softmax_output")(lstm_output)
    # (num_windows, num_classes)
    # Your code above (Lab 3)

    input_length_processed = Lambda(
        lambda x, num_windows=None: x * num_windows, arguments={"num_windows": num_windows}
    )(input_length)

    ctc_loss_output = Lambda(lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name="ctc_loss")(
        [y_true, softmax_output, input_length_processed, label_length]
    )

    ctc_decoded_output = Lambda(lambda x: ctc_decode(x[0], x[1], output_length), name="ctc_decoded")(
        [softmax_output, input_length_processed]
    )

    model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output],
    )
    return model
Exemplo n.º 5
0
    def __init__(self, image_width, learning_rate=0.001):
        # initialize model structure
        x = Input(shape=(image_width, image_width, 3))
        x1 = self.inception_layer(1, 4, 4, 2)(x)
        x1 = BatchNormalization()(x1)
        x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1)

        x2 = self.inception_layer(2, 4, 4, 2)(x1)
        x2 = BatchNormalization()(x2)
        x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2)

        x3 = Conv2D(16, (5, 5), padding='same', activation='relu')(x2)
        x3 = BatchNormalization()(x3)
        x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3)

        x4 = Conv2D(16, (5, 5), padding='same', activation='relu')(x3)
        x4 = BatchNormalization()(x4)
        x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4)

        y = Flatten()(x4)
        y = Dropout(0.5)(y)
        y = Dense(16)(y)
        y = LeakyReLU(alpha=0.1)(y)
        y = Dropout(0.5)(y)
        y = Dense(1, activation='sigmoid')(y)

        self.model = KerasModel(inputs=x, outputs=y)
        self.model.compile(optimizer=Adam(lr=learning_rate),
                           loss=tensorflow.keras.losses.BinaryCrossentropy(),
                           metrics=[tensorflow.keras.metrics.AUC()])
Exemplo n.º 6
0
    def _gradient_backprop_eager(self,
                                 grad_fn,
                                 layer_name,
                                 images,
                                 mode='max',
                                 output_index=0,
                                 loss_fn=None):
        # save current weight
        weights = self.model.get_weights()

        new_model = clone_model(self.model)
        # Apply weights
        new_model.set_weights(weights)

        for layer in new_model.layers:
            if 'activation' in layer.get_config():
                if 'relu' in layer.activation.__name__:
                    layer.activation = grad_fn

        guided_model = KerasModel(new_model.inputs,
                                  new_model.get_layer(layer_name).output)

        img_tensor = tf.Variable(tf.cast(images, K.floatx()))
        with tf.GradientTape() as tape:
            tape.watch(img_tensor)
            output = guided_model(img_tensor)

            loss = self._get_backprop_loss(output, mode, output_index, loss_fn)

        grads = tape.gradient(loss, img_tensor)

        del guided_model
        del new_model

        return grads.numpy()
    def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]:
        """Predict on a single input."""
        softmax_output_fn = KerasModel(
            inputs=[self.network.get_layer("image").input],
            outputs=[self.network.get_layer("softmax_output").output],
        )
        if image.dtype == np.uint8:
            image = (image / 255).astype(np.float32)

        # Get the prediction and confidence using softmax_output_fn, passing the right input into it.
        input_image = np.expand_dims(image, 0)
        softmax_output = softmax_output_fn.predict(input_image)

        input_length = [softmax_output.shape[1]]
        decoded, log_prob = K.ctc_decode(softmax_output,
                                         input_length,
                                         greedy=True)

        pred_raw = K.eval(decoded[0])[0]
        pred = "".join(self.data.mapping[label] for label in pred_raw).strip()

        neg_sum_logit = K.eval(log_prob)[0][0]
        conf = np.exp(-neg_sum_logit)
        # Your code above (Lab 3)

        return pred, conf
Exemplo n.º 8
0
    def init_model(self):
        x = Input(shape=(IMGWIDTH, IMGWIDTH, 3))

        x1 = Conv2D(8, (3, 3), padding='same', activation='relu')(x)
        x1 = BatchNormalization()(x1)
        x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1)

        x2 = Conv2D(8, (5, 5), padding='same', activation='relu')(x1)
        x2 = BatchNormalization()(x2)
        x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2)

        x3 = Conv2D(16, (5, 5), padding='same', activation='relu')(x2)
        x3 = BatchNormalization()(x3)
        x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3)

        x4 = Conv2D(16, (5, 5), padding='same', activation='relu')(x3)
        x4 = BatchNormalization()(x4)
        x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4)

        y = Flatten()(x4)
        y = Dropout(0.5)(y)
        y = Dense(16)(y)
        y = LeakyReLU(alpha=0.1)(y)
        y = Dropout(0.5)(y)
        y = Dense(1, activation='sigmoid')(y)

        return KerasModel(inputs=x, outputs=y)
Exemplo n.º 9
0
    def decoder(self):
        """ DFL H128 Decoder """
        input_ = Input(shape=(16, 16, self.encoder_dim))
        # Face
        var_x = input_
        var_x = self.blocks.upscale(var_x, self.encoder_dim)
        var_x = self.blocks.upscale(var_x, self.encoder_dim // 2)
        var_x = self.blocks.upscale(var_x, self.encoder_dim // 4)
        var_x = self.blocks.conv2d(var_x, 3,
                                   kernel_size=5,
                                   padding="same",
                                   activation="sigmoid",
                                   name="face_out")
        outputs = [var_x]

        if self.config.get("learn_mask", False):
            var_y = input_
            var_y = self.blocks.upscale(var_y, self.encoder_dim)
            var_y = self.blocks.upscale(var_y, self.encoder_dim // 2)
            var_y = self.blocks.upscale(var_y, self.encoder_dim // 4)
            var_y = self.blocks.conv2d(var_y, 1,
                                       kernel_size=5,
                                       padding="same",
                                       activation="sigmoid",
                                       name="mask_out")
            outputs.append(var_y)
        return KerasModel(input_, outputs=outputs)
    def evaluate(self, x, y, batch_size: int=16, verbose=True) -> float:
        test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn)

        # We can use the `ctc_decoded` layer that is part of our model here.
        decoding_model = KerasModel(inputs=self.network.input, outputs=self.network.get_layer('ctc_decoded').output)
        preds = decoding_model.predict_generator(test_sequence)

        trues = np.argmax(y, -1)
        pred_strings = [''.join(self.data.mapping.get(label, '') for label in pred).strip(' |_') for pred in preds]
        true_strings = [''.join(self.data.mapping.get(label, '') for label in true).strip(' |_') for true in trues]

        char_accuracies = [
            1 - editdistance.eval(true_string, pred_string) / len(true_string)
            for pred_string, true_string in zip(pred_strings, true_strings)
        ]
        if verbose:
            sorted_ind = np.argsort(char_accuracies)
            print("\nLeast accurate predictions:")
            for ind in sorted_ind[:5]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nMost accurate predictions:")
            for ind in sorted_ind[-5:]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nRandom predictions:")
            for ind in np.random.randint(0, len(char_accuracies), 5):
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
        mean_accuracy = np.mean(char_accuracies)
        return mean_accuracy
Exemplo n.º 11
0
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14, conv_dim=128, lstm_dim=256):
    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    num_windows = int((image_width - window_width) / window_stride) + 1
    if num_windows < output_length:
        raise ValueError(f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})')

    image_input = Input(shape=input_shape, name='image')
    y_true = Input(shape=(output_length,), name='y_true')
    input_length = Input(shape=(1,), name='input_length')
    label_length = Input(shape=(1,), name='label_length')

    gpu_present = len(device_lib.list_local_devices()) > 1
    lstm_fn = CuDNNLSTM if gpu_present else LSTM

    # Your code should use slide_window and extract image patches from image_input.
    # Pass a convolutional model over each image patch to generate a feature vector per window.
    # Pass these features through one or more LSTM layers.
    # Convert the lstm outputs to softmax outputs.
    # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length).

    ##### Your code below (Lab 3)
    image_reshaped = Reshape((image_height, image_width, 1))(image_input)
    # (image_height, image_width, 1)

    conv = Conv2D(conv_dim, (image_height, window_width), (1, window_stride), activation='relu')(image_reshaped)

    conv_squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv)

    lstm_output1 = lstm_fn(lstm_dim, return_sequences=True)(convnet_outputs)
    # (num_windows, 128)
    lstm_output2 = lstm_fn(lstm_dim, return_sequences=True)(lstm_output1)
    lstm_output3 = lstm_fn(lstm_dim, return_sequences=True)(lstm_output2 + lstm_output1)
    lstm_output4 = lstm_fn(lstm_dim, return_sequences=True)(lstm_output3 + lstm_output2)

    softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output4)
    # (num_windows, num_classes)
    ##### Your code above (Lab 3)

    input_length_processed = Lambda(
        lambda x, num_windows=None: x * num_windows,
        arguments={'num_windows': num_windows}
    )(input_length)

    ctc_loss_output = Lambda(
        lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]),
        name='ctc_loss'
    )([y_true, softmax_output, input_length_processed, label_length])

    ctc_decoded_output = Lambda(
        lambda x: ctc_decode(x[0], x[1], output_length),
        name='ctc_decoded'
    )([softmax_output, input_length_processed])

    model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length],
        outputs=[ctc_loss_output, ctc_decoded_output]
    )
    return model
Exemplo n.º 12
0
    def fprop(self, x):
        """
    Exposes all the layers of the model returned by get_layer_names.
    :param x: A symbolic representation of the network input
    :return: A dictionary mapping layer names to the symbolic
             representation of their output.
    """
        from tensorflow.keras.models import Model as KerasModel

        if self.keras_model is None:
            # Get the input layer
            new_input = self.model.get_input_at(0)

            # Make a new model that returns each of the layers as output
            out_layers = [x_layer.output for x_layer in self.model.layers]
            self.keras_model = KerasModel(new_input, out_layers)

        # and get the outputs for that model on the input x
        outputs = self.keras_model(x)

        # Keras only returns a list for outputs of length >= 1, if the model
        # is only one layer, wrap a list
        if len(self.model.layers) == 1:
            outputs = [outputs]

        # compute the dict to return
        fprop_dict = dict(zip(self.get_layer_names(), outputs))

        return fprop_dict
Exemplo n.º 13
0
    def decoder(self):
        """ Decoder Network """
        input_ = Input(shape=(8, 8, 512))
        var_x = input_
        var_x = self.blocks.upscale(var_x, 256)
        var_x = self.blocks.upscale(var_x, 128)
        var_x = self.blocks.upscale(var_x, 64)
        var_x = self.blocks.conv2d(var_x,
                                   3,
                                   kernel_size=5,
                                   padding="same",
                                   activation="sigmoid",
                                   name="face_out")
        outputs = [var_x]

        if self.config.get("learn_mask", False):
            var_y = input_
            var_y = self.blocks.upscale(var_y, 256)
            var_y = self.blocks.upscale(var_y, 128)
            var_y = self.blocks.upscale(var_y, 64)
            var_y = self.blocks.conv2d(var_y,
                                       1,
                                       kernel_size=5,
                                       padding="same",
                                       activation="sigmoid",
                                       name="mask_out")
            outputs.append(var_y)
        return KerasModel(input_, outputs=outputs)
    def build(self):
        # build the Inception V3 network, use pretrained weights from ImageNet
        # remove top fully connected layers by include_top=False
        base_model = applications.InceptionV3(weights='imagenet',
                                              include_top=False,
                                              input_shape=(self.img_width,
                                                           self.img_height, 3))

        # Add new layers on top of the model
        # build a classifier model to put on top of the convolutional model
        # This consists of a global average pooling layer and a fully connected layer with 256 nodes
        # Then apply dropout and sigmoid activation
        model_top = Sequential()
        model_top.add(
            GlobalAveragePooling2D(input_shape=base_model.output_shape[1:],
                                   data_format=None)),
        model_top.add(Dense(256, activation='relu'))
        model_top.add(Dropout(0.5))
        model_top.add(Dense(1, activation='sigmoid'))
        model = KerasModel(inputs=base_model.input,
                           outputs=model_top(base_model.output))

        # Compile model using Adam optimizer with common values and binary cross entropy loss
        # Use low learning rate (lr) for transfer learning
        model.compile(optimizer=Adam(lr=self.learning_rate,
                                     beta_1=self.beta_1,
                                     beta_2=self.beta_2,
                                     epsilon=self.epsilon,
                                     decay=self.decay),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        self._model = model
Exemplo n.º 15
0
 def Decoder(self):
     input_ = Input(shape=(8, 8, 512))
     x = input_
     x = self.upscale(256)(x)
     x = self.upscale(128)(x)
     x = self.upscale(64)(x)
     x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x)
     return KerasModel(input_, x)
Exemplo n.º 16
0
 def build_autoencoders(self, inputs):
     """ Initialize original model """
     logger.debug("Initializing model")
     for side in ("a", "b"):
         logger.debug("Adding Autoencoder. Side: %s", side)
         decoder = self.networks["decoder_{}".format(side)].network
         output = decoder(self.networks["encoder"].network(inputs[0]))
         autoencoder = KerasModel(inputs, output)
         self.add_predictor(side, autoencoder)
     logger.debug("Initialized model")
Exemplo n.º 17
0
    def init_model(self, dl_rate):
        x = Input(shape = (IMGWIDTH, IMGWIDTH, 3))
        
        x1 = Conv2D(16, (3, 3), dilation_rate = dl_rate, strides = 1, padding='same', activation = 'relu')(x)
        x1 = Conv2D(4, (1, 1), padding='same', activation = 'relu')(x1)
        x1 = BatchNormalization()(x1)
        x1 = MaxPooling2D(pool_size=(8, 8), padding='same')(x1)

        y = Flatten()(x1)
        y = Dropout(0.5)(y)
        y = Dense(1, activation = 'sigmoid')(y)
        return KerasModel(inputs = x, outputs = y)
Exemplo n.º 18
0
 def Encoder(self):
     input_ = Input(shape=IMAGE_SHAPE)
     x = input_
     x = self.conv(128)(x)
     x = self.conv(256)(x)
     x = self.conv(512)(x)
     x = self.conv(1024)(x)
     x = Dense(ENCODER_DIM)(Flatten()(x))
     x = Dense(4 * 4 * 1024)(x)
     x = Reshape((4, 4, 1024))(x)
     x = self.upscale(512)(x)
     return KerasModel(input_, x)
Exemplo n.º 19
0
def SP_ResNet(num_classes,
              input_shape,
              depths=[2, 2, 2, 2],
              filters=[64, 128, 256, 512],
              pool_at=[0, 1, 2, 3],
              squeeze_ratio=16,
              use_residuals=True,
              dense_layers=[],
              dropout_rate=None):
    # ...

    input_img = Input(shape=input_shape, name='input')

    # entry conv + pool
    x = Conv2D(filters[0], (7, 7),
               strides=(2, 2),
               padding='same',
               activation=None,
               name='entry_conv')(input_img)
    x = BatchNormalization(name='entry_bn')(x)
    x = Activation('relu', name='entry_relu')(x)
    x = MaxPool2D((3, 3), strides=(2, 2), padding='same')(x)

    pooling_outputs = []
    for i, (f, d) in enumerate(zip(filters, depths)):
        # n_blocks = depth
        for n in range(d):
            downsample = True if n == 0 else False
            x, z = SP_block(x,
                            f,
                            str(i) + '_' + str(n),
                            ratio=squeeze_ratio,
                            residual=use_residuals,
                            downsample=downsample)

        # only pool at last block in depth
        if i in pool_at and z is not None:
            z = Lambda(bilinear_pooling,
                       name='bilinear_pooling' + str(i))([z, z])
            pooling_outputs.append(z)
            print(z.get_shape().as_list())

    x = GlobalAveragePooling2D(name='global_pooling_top')(x)
    pooling_outputs.append(x)
    x = Concatenate(name='feature_concat')(pooling_outputs)

    x = make_dense_layers(dense_layers, dropout=dropout_rate)(x)

    pred = Dense(num_classes, activation='softmax')(x)

    model = KerasModel(inputs=input_img, outputs=pred)

    return model
Exemplo n.º 20
0
 def encoder(self):
     """ DFL H128 Encoder """
     input_ = Input(shape=self.input_shape)
     var_x = input_
     var_x = self.blocks.conv(var_x, 128)
     var_x = self.blocks.conv(var_x, 256)
     var_x = self.blocks.conv(var_x, 512)
     var_x = self.blocks.conv(var_x, 1024)
     var_x = Dense(self.encoder_dim)(Flatten()(var_x))
     var_x = Dense(8 * 8 * self.encoder_dim)(var_x)
     var_x = Reshape((8, 8, self.encoder_dim))(var_x)
     var_x = self.blocks.upscale(var_x, self.encoder_dim)
     return KerasModel(input_, var_x)
Exemplo n.º 21
0
 def encoder(self):
     """ Encoder Network """
     input_ = Input(shape=self.input_shape)
     var_x = input_
     var_x = self.blocks.conv(var_x, 128)
     var_x = self.blocks.conv(var_x, 256)
     var_x = self.blocks.conv(var_x, 512)
     if not self.config.get("lowmem", False):
         var_x = self.blocks.conv(var_x, 1024)
     var_x = Dense(self.encoder_dim)(Flatten()(var_x))
     var_x = Dense(4 * 4 * 1024)(var_x)
     var_x = Reshape((4, 4, 1024))(var_x)
     var_x = self.blocks.upscale(var_x, 512)
     return KerasModel(input_, var_x)
Exemplo n.º 22
0
    def initModel(self):
        optimizer = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)
        x = Input(shape=IMAGE_SHAPE)

        self.autoencoder_A = KerasModel(x, self.decoder_A(self.encoder(x)))
        self.autoencoder_B = KerasModel(x, self.decoder_B(self.encoder(x)))

        try:
            self.autoencoder_A_multi = multi_gpu_model(self.autoencoder_A,
                                                       gpus=2)
            self.autoencoder_B_multi = multi_gpu_model(self.autoencoder_B,
                                                       gpus=2)
            self.autoencoder_A_multi.compile(optimizer=optimizer,
                                             loss='mean_absolute_error')
            self.autoencoder_B_multi.compile(optimizer=optimizer,
                                             loss='mean_absolute_error')
        except:
            self.autoencoder_A_multi = self.autoencoder_A
            self.autoencoder_B_multi = self.autoencoder_A

        self.autoencoder_A.compile(optimizer=optimizer,
                                   loss='mean_absolute_error')
        self.autoencoder_B.compile(optimizer=optimizer,
                                   loss='mean_absolute_error')
Exemplo n.º 23
0
    def load(self):
        """

        Returns
        -------
        tensorflow.keras.models.Model
            A neural network of sequential layers
            from the configured layer list.
        """
        layers = [Input(**self._input_params)]

        for i, layer in enumerate(self._layers):
            next_tensor = layer_from_config(layer)
            layers.append(next_tensor(layers[i]))
        return KerasModel(inputs=layers[0], outputs=layers[-1])
    def evaluate(self,
                 x,
                 y,
                 batch_size: int = 16,
                 verbose: bool = True) -> float:
        """Evaluate model."""
        test_sequence = DatasetSequence(x,
                                        y,
                                        batch_size,
                                        format_fn=self.batch_format_fn)

        # We can use the `ctc_decoded` layer that is part of our model here.
        decoding_model = KerasModel(
            inputs=self.network.input,
            outputs=self.network.get_layer("ctc_decoded").output)
        preds = decoding_model.predict(test_sequence)

        trues = np.argmax(y, -1)
        pred_strings = [
            "".join(self.data.mapping.get(label, "")
                    for label in pred).strip(" |_") for pred in preds
        ]
        true_strings = [
            "".join(self.data.mapping.get(label, "")
                    for label in true).strip(" |_") for true in trues
        ]

        char_accuracies = [
            1 - editdistance.eval(true_string, pred_string) / len(true_string)
            for pred_string, true_string in zip(pred_strings, true_strings)
        ]
        if verbose:
            sorted_ind = np.argsort(char_accuracies)
            print("\nLeast accurate predictions:")
            for ind in sorted_ind[:5]:
                print(f"True: {true_strings[ind]}")
                print(f"Pred: {pred_strings[ind]}")
            print("\nMost accurate predictions:")
            for ind in sorted_ind[-5:]:
                print(f"True: {true_strings[ind]}")
                print(f"Pred: {pred_strings[ind]}")
            print("\nRandom predictions:")
            random_ind = np.random.randint(0, len(char_accuracies), 5)
            for ind in random_ind:  # pylint: disable=not-an-iterable
                print(f"True: {true_strings[ind]}")
                print(f"Pred: {pred_strings[ind]}")
        mean_accuracy = np.mean(char_accuracies)
        return mean_accuracy
Exemplo n.º 25
0
def line_lstm_ctc(input_shape,
                  output_shape,
                  window_width=28,
                  window_stride=14):  # pylint: disable=too-many-locals
    image_height, image_width = input_shape
    output_length, num_classes = output_shape

    num_windows = int((image_width - window_width) / window_stride) + 1
    if num_windows < output_length:
        raise ValueError(
            f'Window width/stride need to generate >= {output_length} windows (currently {num_windows})'
        )

    image_input = Input(shape=input_shape, name='image')
    y_true = Input(shape=(output_length, ), name='y_true')
    input_length = Input(shape=(1, ), name='input_length')
    label_length = Input(shape=(1, ), name='label_length')

    gpu_present = len(device_lib.list_local_devices()) > 2
    lstm_fn = CuDNNLSTM if gpu_present else LSTM

    # Your code should use slide_window and extract image patches from image_input.
    # Pass a convolutional model over each image patch to generate a feature vector per window.
    # Pass these features through one or more LSTM layers.
    # Convert the lstm outputs to softmax outputs.
    # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length).

    # Your code below (Lab 3)

    # Your code above (Lab 3)

    input_length_processed = Lambda(
        lambda x, num_windows=None: x * num_windows,
        arguments={'num_windows': num_windows})(input_length)

    ctc_loss_output = Lambda(
        lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')(
            [y_true, softmax_output, input_length_processed, label_length])

    ctc_decoded_output = Lambda(
        lambda x: ctc_decode(x[0], x[1], output_length),
        name='ctc_decoded')([softmax_output, input_length_processed])

    model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length],
        outputs=[ctc_loss_output, ctc_decoded_output])
    return model
Exemplo n.º 26
0
    def sub_model(self, layer_name):
        """
        Create a sub-model with the same inputs, and the outputs of a specific
        layer in the deoxys model.

        Parameters
        ----------
        layer_name : str
            name of layer

        Returns
        -------
        tensorflow.keras.models.Model
            Model, whose outputs are of the layer_name
        """
        return KerasModel(inputs=self.model.inputs,
                          outputs=self.layers[layer_name].output)
Exemplo n.º 27
0
def deepten(num_classes,
            input_shape,
            backbone_cnn=None,
            encode_K=32,
            conv1x1=128,
            dense_layers=[],
            dropout_rate=None):
    '''Combine a backbone CNN + Encoding layer + Dense layers into a DeepTEN.

    Parameters
    ----------
    backbone_cnn : KerasModel or str
        Feature extraction network. If KerasModel, should output features (N, H, W, C).
        If str, loads the corresponding ImageNet model from `keras.applications`.
    n_classes : int
        Number of classes for softmax output layer
    input_shape : tuple of int, optional
        Shape of input image. Can be None, since Encoding layer allows variable input sizes.
    encode_K : int, optional
        Number of codewords to learn, default=32.
    conv1x1 : int, optional
        Add a 1x1 conv to reduce number of filters in backbone_cnn.output before Encoding layer, default=128.
    dense_layers : iterable of int, optional
        Sizes for additional Dense layers between Encoding.output and softmax, default=[].
    dropout_rate: float, optional
        Specify a dropout rate for Dense layers

    Returns
    -------
    DeepTEN : KerasModel
        Deep Texture Encoding Network
    '''
    assert backbone_cnn is not None
    backbone_model = make_backbone(backbone_cnn, input_shape)
    conv_output = backbone_model.output
    if conv1x1 is not None:
        conv_output = Conv2D(conv1x1, (1, 1), activation='relu')(conv_output)
        conv_output = BatchNormalization()(conv_output)

    x = Encoding(encode_K, dropout=dropout_rate)(conv_output)
    x = make_dense_layers(dense_layers, dropout=dropout_rate)(x)
    pred = Dense(num_classes, activation='softmax')(x)

    model = KerasModel(inputs=backbone_model.input, outputs=pred)

    return model
Exemplo n.º 28
0
def make_backbone(backbone_cnn, input_shape):
    '''Check an existing backbone Model or grab ImageNet pretrained from keras_apps.'''
    if backbone_cnn is None:
        return None
    elif isinstance(backbone_cnn, KerasModel):
        assert len(backbone_cnn.output_shape
                   ) == 4, 'backbone_cnn.output must output a 4D Tensor'
        return backbone_cnn
    elif isinstance(backbone_cnn, str):
        assert backbone_cnn in keras_apps.keys(
        ), 'Invalid keras.applications string'
        model = keras_apps[backbone_cnn](include_top=False,
                                         input_shape=input_shape)
        # resnet50 ends with a 7x7 pooling, which collapses conv to 1x1 for 224x224 input
        if backbone_cnn == 'resnet50':
            model = KerasModel(inputs=model.input,
                               outputs=model.layers[-2].output)
        return model
    else:
        raise ValueError('input to make_backbone() has invalid type')
Exemplo n.º 29
0
 def defineModel(self,
                 maxlen,
                 max_features=20000,
                 embed_size=128,
                 number_of_classes=1):
     #Input layer
     inp = Input(shape=(maxlen, ))
     x = Embedding(max_features, embed_size)(inp)
     #LSTM layer
     x = LSTM(60, return_sequences=True, name='lstm_layer')(x)
     x = GlobalMaxPool1D()(x)
     x = Dropout(0.1)(x)
     x = Dense(50, activation="relu")(x)
     x = Dropout(0.1)(x)
     x = Dense(number_of_classes, activation="softmax")(x)
     model = KerasModel(inputs=inp, outputs=x)
     model.compile(loss='binary_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])
     return model
Exemplo n.º 30
0
        # lstm_output = Bidirectional(lstm_fn(256, return_sequences=True))(lstm_output)
        # lstm_output = Dropout(0.5)(lstm_output)
        lstm_output = BatchNormalization()(lstm_output)
        lstm_output = Conv1D(256, 3, activation='relu', padding='SAME')(lstm_output)
        lstm_output = Dropout(0.5)(lstm_output)

    softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output)
    # (num_windows, num_classes)
    ##### Your code above (Lab 3)

    input_length_processed = Lambda(
        lambda x, num_windows=None: x * num_windows,
        arguments={'num_windows': num_windows}
    )(input_length)

    ctc_loss_output = Lambda(
        lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]),
        name='ctc_loss'
    )([y_true, softmax_output, input_length_processed, label_length])

    ctc_decoded_output = Lambda(
        lambda x: ctc_decode(x[0], x[1], output_length),
        name='ctc_decoded'
    )([softmax_output, input_length_processed])

    model = KerasModel(
        inputs=[image_input, y_true, input_length, label_length],
        outputs=[ctc_loss_output, ctc_decoded_output]
    )
    return model