Exemplo n.º 1
0
def get_optimizer(config):
    if config.use_lr_scheduler:
        if config.lr_scheduler == "InverseTimeDecay":
            learning_rate_schedule = InverseTimeDecay(
                config.initial_lr, config.decay_steps, config.decay_rate
            )
        elif config.lr_scheduler == "ExponentialDecay":
            learning_rate_schedule = ExponentialDecay(
                config.initial_lr, config.decay_steps, config.decay_rate
            )
        elif config.lr_scheduler.lower() == "custom":
            if config.optimizer.lower() == "adam":
                optimizer = Adam(learning_rate=config.initial_lr)
            elif config.optimizer.lower() == "rmsprop":
                optimizer = RMSprop(learning_rate=config.initial_lr)
            else:
                raise Exception(
                    """Please enter a supported optimizer: Adam or RMSprop."""
                )
            return optimizer
        else:
            raise Exception(
                """Please enter a supported learning rate scheduler:
                            InverseTimeDecay or ExponentialDecay."""
            )
        if config.optimizer.lower() == "adam":
            optimizer = Adam(learning_rate_schedule)
        elif config.optimizer.lower() == "rmsprop":
            optimizer = RMSprop(learning_rate_schedule)
        else:
            raise Exception("""Please enter a supported optimizer: Adam or RMSprop.""")
    else:
        if config.optimizer.lower() == "adam":
            optimizer = Adam(learning_rate=config.lr)
        elif config.optimizer.lower() == "rmsprop":
            optimizer = RMSprop(learning_rate=config.lr)
        else:
            raise Exception("""Please enter a supported optimizer: Adam or RMSprop.""")
    return optimizer
Exemplo n.º 2
0
def tabular_cnn(n_of_input_columns, n_of_output_classes):
    inputs = Input(shape=(n_of_input_columns, ))
    inputs_normalization = BatchNormalization()(inputs)
    inputs_feature_selection = Dropout(0.3)(inputs_normalization)
    x = Reshape((n_of_input_columns, 1))(inputs_feature_selection)
    x = Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')(x)
    x = Dropout(0.5)(x)
    x = Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')(x)
    x = MaxPooling1D(pool_size=5, padding='same')(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    output = Dense(n_of_output_classes, activation='softmax')(x)
    model = Model(inputs, output)
    lr_schedule = ExponentialDecay(initial_learning_rate=1e-5,
                                   decay_steps=1000,
                                   decay_rate=0.8)
    optimizer = SGD(learning_rate=lr_schedule)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['categorical_accuracy'])
    return model
Exemplo n.º 3
0
def get_lr_scheduler(learning_rate, decay_type, decay_steps):
    if decay_type:
        decay_type = decay_type.lower()

    if decay_type == None:
        lr_scheduler = learning_rate
    elif decay_type == 'cosine':
        lr_scheduler = CosineDecay(
            initial_learning_rate=learning_rate,
            decay_steps=decay_steps,
            alpha=0.2)  # use 0.2*learning_rate as final minimum learning rate
    elif decay_type == 'exponential':
        lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=0.9)
    elif decay_type == 'polynomial':
        lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate,
                                       decay_steps=decay_steps,
                                       end_learning_rate=learning_rate / 100)
    else:
        raise ValueError('Unsupported lr decay type')

    return lr_scheduler
Exemplo n.º 4
0
def siamese_model_VGG(exp_values):

    embedding_length, fc1, fc2, feat1, feat2, feat3, feat4, feat5 = exp_values

    imgA = Input(shape=(64, 64, 3))
    imgB = Input(shape=(64, 64, 3))

    base_model = VGG_16(embedding_length,
                        fc1,
                        fc2,
                        feat1,
                        feat2,
                        feat3,
                        feat4,
                        feat5,
                        input_shape=(64, 64, 3))

    featsA = base_model(imgA)
    featsB = base_model(imgB)

    distance = Lambda(euclidean_distance, name='compute_ED')([featsA, featsB])
    model = Model(inputs=[imgA, imgB], outputs=distance, name='Contrastive')

    # model.load_weights('saved_models/checkpoints/' + model_name)

    initial_learning_rate = 0.001
    lr_schedule = ExponentialDecay(initial_learning_rate,
                                   decay_steps=781,
                                   decay_rate=0.90,
                                   staircase=True)

    model.compile(loss=contrastive_loss,
                  optimizer=keras.optimizers.Adam(lr_schedule),
                  metrics=[accuracy1])

    model.summary()
    return model
Exemplo n.º 5
0
    len(scan_validate)).map(validation_prepare).batch(batch_size).prefetch(2))

# data = train_data.take(1)
# images, labels = list(data)[0]
# images = images.numpy()
# image = images[0]
# print(images.shape)  # (2, 128, 128, 64, 1)
# print(image.shape)  # (128, 128, 64, 1)
# print(image[:, :, 30].shape)  # (128, 128, 1)
''' Build model '''
model = get_model(width=128, height=128, depth=64)
# model.summary()
''' Compile model '''
initial_lr = 0.0001
lr_schedule = ExponentialDecay(initial_learning_rate=initial_lr,
                               decay_steps=100000,
                               decay_rate=0.96,
                               staircase=True)

model.compile(optimizer=Adam(learning_rate=lr_schedule),
              loss="binary_crossentropy",
              metrics=['accuracy'])
''' Define callbacks '''
checkpoint_callback = ModelCheckpoint(filepath='3d_image_classification.h5',
                                      monitor='val_accuracy',
                                      save_best_only=True)
early_stop_callback = EarlyStopping(monitor='val_accuracy', patience=15)
''' Train the model, doing validation at the end of each epoch '''
epoch = 50

model.fit(train_data,
          epochs=epoch,
Exemplo n.º 6
0
denseLength = 8
# Layer growth factor
denseGrowth = 0.85

# Dropout for additional regularization.
dropout_val = 0.10

# Activation Functions
# Non output activation
activation_function = 'relu'
# Output activation.
output_activation = 'linear'

# Learning rate decay to prevent wild oscillations when closer to minima.
learningRateScheduler = ExponentialDecay(learning_rate,
                                         decay_steps=300,
                                         decay_rate=0.9,
                                         staircase=True)

###### !!!!!!!!!!!!!!! DO NOT EDIT UNLESS YOU KNOW WHAT TO DO !!!!!!!!!!!!!!! ######

model = build_model_FCNN(X_train.shape, outputSize, denseWidth, denseLength,
                         denseGrowth, dropout_val, activation_function,
                         output_activation)
model = compile_model(model, learningRateScheduler, momentum, loss_function,
                      metrics)
model.summary()

print("Complete. Training Commencing.")

model_history = model.fit(X_train,
                          y_train,
Exemplo n.º 7
0
 def get_lr_sched(lr):
     return ExponentialDecay(lr, config.decay_steps,
                             config.decay_rate)
def get_Lemaire_MTL_model(
        TR_STEPS,
        N_MELS=120,
        n_classes=3,
        patch_size=68,
        loss_weights=None,  #{'S': 1.0, 'M': 1.0, 'R': 1.0, '3C': 1.0},
):
    '''
    MTL modification of the TCN based model architecture proposed by 
    Lemaire et al. [3]
    Code source: https://github.com/qlemaire22/speech-music-detection
    The model parameters are tuned on the MUSAN dataset.

        [3] Lemaire, Q., & Holzapfel, A. (2019). Temporal convolutional networks 
    for speech and music detection in radio broadcast. In 20th International 
    Society for Music Information Retrieval Conference, ISMIR 2019, 4-8 
    November 2019. International Society for Music Information Retrieval.

    Parameters
    ----------
    TR_STEPS : int
        Number of training batches per epoch.
    N_MELS : int, optional
        The default is 120.
    n_classes : int, optional
        The default is 3.
    patch_size : int, optional
        The default is 68.
    loss_weights : dict, optional
        The default is {'S': 1.0, 'M': 1.0, 'R': 1.0, '3C': 1.0}.

    Returns
    -------
    model : tensorflow.keras.models.Model
        CNN model.
    lr : float
        Learning rate.

    '''
    from tcn import TCN
    from tcn.tcn import process_dilations

    kernel_size = 3  # Temporal Conv, LogMelSpec
    Nd = 8  # Temporal Conv, LogMelSpec
    nb_stacks = 3  # Temporal Conv, LogMelSpec
    n_layers = 1  # Temporal Conv, LogMelSpec
    n_filters = 32  # Temporal Conv, LogMelSpec
    use_skip_connections = False  # Temporal Conv, LogMelSpec
    activation = 'norm_relu'
    dilations = [2**nd for nd in range(Nd)]
    list_n_filters = [n_filters] * n_layers
    dropout_rate = np.random.uniform(0.05, 0.5)
    padding = 'same'
    dilations = process_dilations(dilations)

    input_layer = Input(shape=(patch_size, N_MELS))

    for i in range(n_layers):
        if i == 0:
            x = TCN(list_n_filters[i],
                    kernel_size,
                    nb_stacks,
                    dilations,
                    activation,
                    padding,
                    use_skip_connections,
                    dropout_rate,
                    return_sequences=True)(input_layer)
        else:
            x = TCN(list_n_filters[i],
                    kernel_size,
                    nb_stacks,
                    dilations,
                    activation,
                    padding,
                    use_skip_connections,
                    dropout_rate,
                    return_sequences=True,
                    name="tcn" + str(i))(x)

    x = Flatten()(x)

    classification_output = Dense(n_classes, activation='softmax',
                                  name='3C')(x)

    sp_output, x_sp, mu_output, x_mu, smr_output, x_smr = MTL_modifications(x)

    model = Model(input_layer,
                  [sp_output, mu_output, smr_output, classification_output])

    initial_learning_rate = 0.002
    lr_schedule = ExponentialDecay(initial_learning_rate,
                                   decay_steps=3 * TR_STEPS,
                                   decay_rate=0.1)
    optimizer = optimizers.SGD(learning_rate=lr_schedule,
                               clipnorm=1,
                               momentum=0.9)

    model.compile(loss={
        'S': 'binary_crossentropy',
        'M': 'binary_crossentropy',
        'R': 'mean_squared_error',
        '3C': 'categorical_crossentropy'
    },
                  loss_weights=loss_weights,
                  optimizer=optimizer,
                  metrics={'3C': 'accuracy'})

    print(model.summary())
    print(
        'MTL modification of the architecture of Lemaire et. al. Proc. of the 20th ISMIR Conference, Delft, Netherlands, November 4-8, 2019\n'
    )

    return model, initial_learning_rate
cnn = Sequential()

cnn.add(Conv2D(filters=32, kernel_size=4, activation="relu", input_shape=[256, 256, 3]))
cnn.add(MaxPool2D(pool_size=2, strides=2, padding='valid'))
cnn.add(Dropout(0.2))
cnn.add(Conv2D(filters=32, kernel_size=4, activation="relu"))
cnn.add(MaxPool2D(pool_size=2, strides=2, padding='valid'))
cnn.add(Flatten())
cnn.add(Dense(units = 128, activation = 'relu'))
cnn.add(Dense(units = 1, activation = 'sigmoid'))

### Optimizers tested
opt = Adam(learning_rate=0.1) 
lr_schedule = ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=1000,
    decay_rate=0.9)
opt2 = SGD(learning_rate=lr_schedule)
 
cnn.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy','Precision','Recall'])

cnn.summary()

batch_size = 64 
cnn.fit_generator(training_set,
                  steps_per_epoch = 880//batch_size,
                  epochs = 4,
                  validation_data = test_set,
                  validation_steps = 220//batch_size) 

### Save the model
def get_Lemaire_model(
    TR_STEPS,
    # kernel_size=3, # Temporal Conv, MelSpec
    # Nd=8, # Temporal Conv, MelSpec
    # nb_stacks=3, # Temporal Conv, MelSpec
    # n_layers=1, # Temporal Conv, MelSpec
    # n_filters=32, # Temporal Conv, MelSpec
    # use_skip_connections=True, # Temporal Conv, MelSpec
    # ----
    kernel_size=3,  # Temporal Conv, LogMelSpec
    Nd=8,  # Temporal Conv, LogMelSpec
    nb_stacks=3,  # Temporal Conv, LogMelSpec
    n_layers=1,  # Temporal Conv, LogMelSpec
    n_filters=32,  # Temporal Conv, LogMelSpec
    use_skip_connections=False,  # Temporal Conv, LogMelSpec
    # ----
    activation='norm_relu',
    bidirectional=True,
    N_MELS=80,
    n_classes=2,
    patch_size=68,
):
    '''
    TCN based model architecture proposed by Lemaire et al. [3]
    Code source: https://github.com/qlemaire22/speech-music-detection    

    Parameters
    ----------
    TR_STEPS : int
        Number of training batches per epoch.
    n_filters : int, optional
        The default is 32.
    Nd : int, optional
        The default is 3.
    kernel_size : int, optional
        The default is 3.
    nb_stacks : int, optional
        The default is 10.
    activation : string, optional
        The default is 'norm_relu'.
    n_layers : int, optional
        The default is 3.
    use_skip_connections : boolean, optional
        The default is False.
    bidirectional : boolean, optional
        The default is True.
    N_MELS : int, optional
        The default is 80.
    n_classes : int, optional
        The default is 2.
    patch_size : int, optional
        The default is 68.

    Returns
    -------
    model : tensorflow.keras.models.Model
        CNN model.
    lr : float
        Learning rate.

    '''
    from tcn import TCN
    from tcn.tcn import process_dilations

    dilations = [2**nd for nd in range(Nd)]
    list_n_filters = [n_filters] * n_layers
    dropout_rate = np.random.uniform(0.05, 0.5)
    bidirectional = True

    if bidirectional:
        padding = 'same'
    else:
        padding = 'causal'

    dilations = process_dilations(dilations)

    input_layer = Input(shape=(patch_size, N_MELS))

    for i in range(n_layers):
        if i == 0:
            x = TCN(list_n_filters[i],
                    kernel_size,
                    nb_stacks,
                    dilations,
                    'norm_relu',
                    padding,
                    use_skip_connections,
                    dropout_rate,
                    return_sequences=True)(input_layer)
        else:
            x = TCN(list_n_filters[i],
                    kernel_size,
                    nb_stacks,
                    dilations,
                    'norm_relu',
                    padding,
                    use_skip_connections,
                    dropout_rate,
                    return_sequences=True,
                    name="tcn" + str(i))(x)

    x = Flatten()(x)

    x = Dense(n_classes)(x)
    x = Activation('softmax')(x)
    output_layer = x

    model = Model(input_layer, output_layer)

    initial_learning_rate = 0.002
    lr_schedule = ExponentialDecay(initial_learning_rate,
                                   decay_steps=3 * TR_STEPS,
                                   decay_rate=0.1)
    optimizer = optimizers.SGD(learning_rate=lr_schedule,
                               clipnorm=1,
                               momentum=0.9)

    if n_classes == 2:
        model.compile(loss='binary_crossentropy',
                      metrics='accuracy',
                      optimizer=optimizer)
    elif n_classes == 3:
        model.compile(loss='categorical_crossentropy',
                      metrics='accuracy',
                      optimizer=optimizer)

    print(model.summary())
    print(
        'Architecture of Lemaire et. al. Proc. of the 20th ISMIR Conference, Delft, Netherlands, November 4-8, 2019\n'
    )

    return model, initial_learning_rate
Exemplo n.º 11
0
def transfer_train(
    image_size: int = 128,
    batch_size: int = 32,
    epochs: int = 30,
    l2_regularization: float = 1e-5,
    architecture: str = "BiT-M R50x1",
    learning_rate: float = 1e-3,
    lr_decay_rate: float = 0.99,
    lr_decay_steps: int = 5e2,
    project_name: str = "Seedlings Image Classification (Transfer Learning)",
) -> RunResult:

    # Load data
    train_data = ImageDataset("train",
                              TRAIN_DIR,
                              target_size=(image_size, image_size))
    # Add data augmentation to training set.
    train_data_gen = ImageDataGenerator(
        horizontal_flip=True, zoom_range=0.1).flow(train_data.X,
                                                   train_data.y,
                                                   batch_size=batch_size)
    dev_data = ImageDataset("dev",
                            DEV_DIR,
                            target_size=(image_size, image_size))

    # Define and compile the model.
    model = tf.keras.Sequential([
        hub.KerasLayer(model_map[architecture], trainable=False),
        L.Flatten(),
        L.Dense(
            train_data.n_classes,
            activation="softmax",
            kernel_regularizer=tf.keras.regularizers.l2(l2_regularization),
        ),
    ])

    model.compile(
        optimizer=Adam(
            ExponentialDecay(learning_rate,
                             lr_decay_steps,
                             lr_decay_rate,
                             staircase=True)),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )

    # Make and init the wandb run.
    wandb.init(project=project_name, reinit=True)
    wandb.config.update({
        "batch_size": batch_size,
        "epochs": epochs,
        "image_size": image_size,
        "l2_regularization": l2_regularization,
        "architecture": architecture,
        "learning_rate": learning_rate,
        "lr_decay_rate": lr_decay_rate,
        "lr_decay_steps": lr_decay_steps,
    })

    # Train the model
    model.fit(
        train_data_gen,
        steps_per_epoch=int(len(train_data.X) / batch_size),
        epochs=epochs,
        validation_data=(dev_data.X, dev_data.y),
        callbacks=[WandbCallback(save_model=False)],
    )

    # Evaluate the model
    train_loss, train_acc = model.evaluate(train_data.X, train_data.y)
    dev_loss, dev_acc = model.evaluate(dev_data.X, dev_data.y)

    # Log the scores
    wandb.run.summary.update({
        "final_val_loss": dev_loss,
        "final_val_acc": dev_acc,
        "final_train_loss": train_loss,
        "final_train_acc": train_acc,
    })

    wandb.run.save()
    run_name = wandb.run.name
    wandb.join()  # end this run

    return RunResult(
        dev_acc,
        train_acc,
        run_name,
        make_submission(model, image_size, train_data.index2class),
    )
Exemplo n.º 12
0
def main(args):
    # load train/test data
    datadir = os.path.join(args.volumedir, args.datadir)
    # train = imdb_data_load(datadir)
    train, test = load_datasets(datadir)
    # train, test = load_context_target_pairs(datadir, context_len = args.conlength)
    # train = sorted(train, key=lambda a: len(a), reverse=True)
    # train = train[:min(len(train), args.datacap)]
    # for msg in train:
    #     if "roster" in msg:
    #         print(msg)
    # return

    # Dynamically load modelBuilder class
    moduleName, klassName = args.modelbuilder.split(".")
    mod = __import__('models.%s' % moduleName, fromlist=[klassName])
    klass = getattr(mod, klassName)
    modelBuilder = klass(args)

    timestamp = int(time.time())
    logdir = os.path.join(args.volumedir,
                          datetime.datetime.today().strftime('%Y%m%d'),
                          args.logdir)
    if not os.path.isdir(logdir):
        os.makedirs(logdir)
    hdlr = logging.FileHandler(
        os.path.join(logdir, "training_output_%d.log" % timestamp))
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.INFO)

    checkpointdir = os.path.join(args.volumedir,
                                 datetime.datetime.today().strftime('%Y%m%d'),
                                 args.checkpointdir)
    if not os.path.isdir(checkpointdir):
        os.makedirs(checkpointdir)
    checkpointpath = configure_checkpointing(args, timestamp)
    checkpoint_callback = ModelCheckpoint(filepath=checkpointpath,
                                          save_weights_only=False)

    # Create or load existing model
    init_epoch = 0
    if args.textlineds:
        X, Y, vocab, tokens = SlackTextLineDataset(args, train).get_dataset()
        reverse_token_map = {t: i for i, t in enumerate(vocab)}
    else:
        tokens, vocab, reverse_token_map = modelBuilder.tokenize(
            train, freq_threshold=args.freqthreshold)
    # text_ds = text_ds.shuffle(buffer_size=1024).batch(args.minibatchsize)
    # print(text_ds.cardinality().numpy())
    if args.loadmodel and os.path.exists(args.loadmodel):
        modelpath = args.loadmodel
        timestamp = int(modelpath.split(".")[1])
        init_epoch = int(modelpath.split(".")[2])
        loaddir = "/".join(modelpath.split("/")[:-1])
        model = load_model(modelpath, custom_objects={"EinsumOp": EinsumOp})
        vocab = load_vocab(loaddir, timestamp)
        # tokens = load_tokens(loaddir, timestamp)
        reverse_token_map = {t: i for i, t in enumerate(vocab)}
    else:
        model = modelBuilder.create_model(vocab)
        save_vocab(vocab, checkpointdir, timestamp)
        if args.savetokens:
            save_tokens(tokens, checkpointdir, timestamp)

    plot_model(model,
               to_file='model_plot_2.png',
               show_shapes=True,
               show_layer_names=True)
    optimizer_map = {"adam": Adam, "rmsprop": RMSprop, "sgd": SGD}
    optimizer = optimizer_map[
        args.optimizer] if args.optimizer in optimizer_map.keys() else RMSprop
    lr_decay = ExponentialDecay(initial_learning_rate=args.learningrate,
                                decay_rate=args.decayrate,
                                decay_steps=args.decaysteps)
    custom_lr = CustomSchedule(args.hiddensize)
    opt = optimizer(learning_rate=lr_decay, clipvalue=3)
    # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"])
    # attn_4_output = model.get_layer("attention_values_4").output
    # dense_v_out = model.get_layer("dense_v_4").output
    # einsum_com_output = model.get_layer("einsum_com_4").output
    # inpt = model.get_layer("input")
    # attn_factor_model = keras.Model(inputs=inpt.input, outputs=attn_4_output)
    # einsum_com_model = keras.Model(inputs=inpt.input, outputs=einsum_com_output)
    # dense_v_model = keras.Model(inputs=inpt.input, outputs=dense_v_out)
    model.compile(
        loss=keras.losses.SparseCategoricalCrossentropy(name="loss"),
        run_eagerly=True,
        optimizer=opt,
        metrics=[
            tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            tf.keras.metrics.SparseTopKCategoricalAccuracy(
                k=3, name="top_3_accuracy"),
            tf.keras.metrics.SparseTopKCategoricalAccuracy(
                k=5, name="top_5_accuracy"),
            last_word_prediction_accuracy(args.minibatchsize, args.seqlength)
        ])
    # last_word_prediction_topk_accuracy(args.minibatchsize, args.seqlength, 5)])

    model.summary(print_fn=logger.info)

    checkpointnames = args.checkpointnames % timestamp
    sample_func = lambda: modelBuilder.sample(model, tokens, vocab,
                                              reverse_token_map)
    callbacks = get_callbacks(args.volumedir, checkpointdir, checkpointnames,
                              timestamp, sample_func)
    sample_callback = LambdaCallback(
        on_epoch_end=lambda epoch, logs: sample_func())
    logger_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: logger.
                                     info("Epoch %d: %s" % (epoch, str(logs))))

    if not args.textlineds:
        trainseqs = modelBuilder.get_input_sequences(tokens, reverse_token_map)
        # trainseqs, valseqs = validation_split(seqs, val_split=args.valsplit)

        if args.modelbuilder == "keras_word_lm.WordLanguageModelBuilder":
            trainvectors = SequenceVectors(args, trainseqs, vocab)
            history = model.fit(trainvectors,
                                epochs=args.numepochs,
                                initial_epoch=init_epoch,
                                callbacks=[
                                    sample_callback, logger_callback,
                                    checkpoint_callback
                                ])
            logger.info(history.history)
            plot_history(history.history, args.learningrate, logdir, timestamp)
            return
        X, Y, sample_weights = modelBuilder.build_input_vectors(
            trainseqs, vocab, reverse_token_map)

    # ds = modelBuilder.build_input_vectors(trainseqs, vocab, reverse_token_map)
    # model.fit(X, Y,
    # print(ds)
    # start_prompt = "this movie is"
    # start_tokens = [reverse_token_map[t] for t in start_prompt.split()]
    # num_tokens_generated = 40
    # text_gen_callback = TextGenerator(num_tokens_generated, args.seqlength, start_tokens, vocab)
    history = model.fit(
        X,
        Y,
        epochs=args.numepochs,
        initial_epoch=init_epoch,
        batch_size=args.minibatchsize,
        validation_split=0.1,
        shuffle=True,
        callbacks=[sample_callback, logger_callback, checkpoint_callback])
    logger.info(history.history)
    plot_history(history.history, args.learningrate, logdir, timestamp)
    return
    allmetrics = {}
    for epoch in range(init_epoch, args.numepochs):
        batches = rand_mini_batches(trainseqs, args.minibatchsize)
        for i, batch in enumerate(batches):
            X, Y, sample_weights = modelBuilder.build_input_vectors(
                batch, vocab, reverse_token_map)
            metrics = model.train_on_batch(X,
                                           Y,
                                           sample_weight=sample_weights,
                                           reset_metrics=i == 0,
                                           return_dict=True)
            if i % 100 == 0:
                valmetrics = evaluate_mini_batches(model, modelBuilder, vocab,
                                                   reverse_token_map, valseqs,
                                                   args.minibatchsize)
                metrics.update(valmetrics)
                for key in metrics.keys():
                    if key in allmetrics.keys():
                        allmetrics[key] += [metrics[key]]
                    else:
                        allmetrics[key] = [metrics[key]]
                print("Batch %d of %d in epoch %d: %s" %
                      (i, len(batches), epoch, str(metrics)))
        logger.info("Epoch %d: %s" % (epoch, str(metrics)))
        # logger.info("Validation metrics %s" % str(valmetrics))
        if args.runsamples:
            sample_output = sample_func()
            logger.info("\n" + sample_output)
        model.save(
            os.path.join(checkpointdir, checkpointnames).format(epoch=epoch))
        plot_history(allmetrics, args.learningrate, logdir, timestamp)
Exemplo n.º 13
0
    def __init__(self, **kwargs):
        """
            Input:
                translation_spec - dict with keys 'f_X', 'f_Y'.
                                   Values are passed as kwargs to the
                                   respective ImageTranslationNetwork's
                cycle_lambda=2 - float, loss weight
                cross_lambda=1 - float, loss weight
                l2_lambda=1e-3 - float, loss weight
                learning_rate=1e-5 - float, initial learning rate for
                                     ExponentialDecay
                clipnorm=None - gradient norm clip value, passed to
                                tf.clip_by_global_norm if not None
                logdir=None - path to log directory. If provided, tensorboard
                              logging of training and evaluation is set up at
                              'logdir/'
        """
        learning_rate = kwargs.get("learning_rate", 1e-5)
        lr_all = ExponentialDecay(learning_rate,
                                  decay_steps=10000,
                                  decay_rate=0.96,
                                  staircase=True)
        self._optimizer_all = tf.keras.optimizers.Adam(lr_all)
        lr_k = ExponentialDecay(learning_rate,
                                decay_steps=10000,
                                decay_rate=0.9,
                                staircase=True)
        self._optimizer_k = tf.keras.optimizers.Adam(lr_k)
        self.clipnorm = kwargs.get("clipnorm", None)

        # To keep a history for a specific training_metrics,
        # add `self.metrics_history[name] = []` in subclass __init__
        self.train_metrics = {}
        self.difference_img_metrics = {
            "ACC_di": tf.keras.metrics.Accuracy()
        }  #{"AUC": tf.keras.metrics.AUC()}
        self.change_map_metrics = {
            "ACC": tf.keras.metrics.Accuracy(),
            "cohens kappa": CohenKappa(num_classes=2),
            # 'F1': tfa.metrics.F1Score(num_classes=2, average=None)
        }
        assert not set(self.difference_img_metrics) & set(
            self.change_map_metrics)
        # If the metric dictionaries shares keys, the history will not work
        self.metrics_history = {
            **{key: []
               for key in self.change_map_metrics.keys()},
            **{key: []
               for key in self.difference_img_metrics.keys()},
        }

        self.timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
        self.channels = {
            "x": kwargs.get("channel_x"),
            "y": kwargs.get("channel_y")
        }

        # Flag used in image_to_tensorboard decorator
        self._save_images = tf.Variable(False, trainable=False)

        logdir = kwargs.get("logdir", None)
        if logdir is not None:
            self.log_path = logdir
            self.tb_writer = tf.summary.create_file_writer(self.log_path)
            self._image_dir = tf.constant(os.path.join(self.log_path,
                                                       "images"))
        else:
            self.tb_writer = tf.summary.create_noop_writer()

        self.evaluation_frequency = tf.constant(kwargs.get(
            "evaluation_frequency", 1),
                                                dtype=tf.int64)
        self.epoch = tf.Variable(0, dtype=tf.int64)
def get_Lemaire_model(hp):
    '''
    TCN based model architecture proposed by Lemaire et al. [3]
    Code source: https://github.com/qlemaire22/speech-music-detection    

    Parameters
    ----------
    hp : object
        Hyperparameters.

    Returns
    -------
    model : tensorflow.keras.models.Model
        CNN model.

    '''
    dilations = [2**nd for nd in range(hp.get('Nd'))]
    list_n_filters = [hp.get('n_filters')] * hp.get('n_layers')
    dropout_rate = np.random.uniform(0.05, 0.5)
    bidirectional = True
    N_MELS = 80
    n_classes = 2
    patch_size = 68

    if bidirectional:
        padding = 'same'
    else:
        padding = 'causal'

    dilations = process_dilations(dilations)

    input_layer = Input(shape=(patch_size, N_MELS))

    for i in range(hp.get('n_layers')):
        if i == 0:
            x = TCN(list_n_filters[i],
                    hp.get('kernel_size'),
                    hp.get('nb_stacks'),
                    dilations,
                    'norm_relu',
                    padding,
                    hp.get('skip_some_connections'),
                    dropout_rate,
                    return_sequences=True)(input_layer)
        else:
            x = TCN(list_n_filters[i],
                    hp.get('kernel_size'),
                    hp.get('nb_stacks'),
                    dilations,
                    'norm_relu',
                    padding,
                    hp.get('skip_some_connections'),
                    dropout_rate,
                    return_sequences=True,
                    name="tcn" + str(i))(x)

    x = Flatten()(x)

    x = Dense(n_classes)(x)
    x = Activation('softmax')(x)
    output_layer = x

    model = Model(input_layer, output_layer)

    initial_learning_rate = 0.002
    lr_schedule = ExponentialDecay(initial_learning_rate,
                                   decay_steps=3 * hp.get('TR_STEPS'),
                                   decay_rate=0.1)
    optimizer = optimizers.SGD(learning_rate=lr_schedule,
                               clipnorm=1,
                               momentum=0.9)

    if n_classes == 2:
        model.compile(loss='binary_crossentropy',
                      metrics='accuracy',
                      optimizer=optimizer)
    elif n_classes == 3:
        model.compile(loss='categorical_crossentropy',
                      metrics='accuracy',
                      optimizer=optimizer)

    # print(model.summary())
    # print('Architecture of Lemaire et. al. Proc. of the 20th ISMIR Conference, Delft, Netherlands, November 4-8, 2019\n')
    return model
Exemplo n.º 15
0
# Model initialisation
tf.random.set_seed(seed)
callback = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(N - 1, 4)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(2000, activation=tf.nn.relu))
model.add(tf.keras.layers.LayerNormalization())
model.add(tf.keras.layers.Dense(2000, activation=tf.nn.relu))
# model.add(tf.keras.layers.Dense(50, activation=tf.nn.relu))

model.add(tf.keras.layers.Dense(4 * N))

# learning_rate = CustomSchedule(4, 500)
learning_rate = ExponentialDecay(1e-3, 3000, 0.96)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
model.compile(optimizer=optimizer, loss='mse')

# %%
model.summary()
history = model.fit(X_train, y_train, epochs=10000, verbose=1, validation_split=0.1, callbacks=[callback])
# %%
# Model prediciton output
trans_pred = rev_angle_embedding(model.predict(X_test), N, reshape=True) % (2*np.pi)
E_pred = np.zeros(len(X_test))

for i in range(len(E_pred)):
    E_pred[i] = wrapper(trans_pred[i], data_test[i, 0][:N], data_test[i, 0][N:], dt, data_test[i, 3], N)

np.mean(E_pred / data_test[:, 2])
Exemplo n.º 16
0
LSTM_1 = LSTM(32, return_sequences=True, name='lstm_1')(Reshape_2)
LSTM_2 = LSTM(32, return_sequences=False, name='lstm_2')(LSTM_1)

CN = tf.concat([GAP_1, LSTM_2], axis=-1)
dropout_1 = Dropout(0.2)(CN)
Dense1 = Dense(1, name='Dense_1')(dropout_1)

model = Model(inputs=[Input_1, Input_2], outputs=Dense1)
# save model
checkpoint = ModelCheckpoint(filepath='Model.h5',
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True)
lr = ExponentialDecay(initial_learning_rate=0.003,
                      decay_steps=16,
                      decay_rate=0.9)
adam = Adam(learning_rate=lr)
model.compile(loss='mse', optimizer=adam)
model.fit(x=[train_X_1, train_X_2],
          y=train_Y,
          batch_size=8,
          epochs=600,
          shuffle=True,
          validation_data=([test_X_1, test_X_2], test_Y),
          callbacks=[checkpoint])
pred = model.predict([test_X_1, test_X_2])
rmse = np.mean(
    pow(
        np.square(
            scaler_Y.inverse_transform(test_Y) -
Exemplo n.º 17
0
def create_model(
    model_name, log_dir, args
):  # optimizer, learning rate, activation, neurons, batch size, epochs...

    input_shape = input_size(model_name, args)

    if args.head == 'max' or (args.base_trainable
                              and args.head != 't_complex'):
        pool = 'max'
    else:
        pool = 'none'

    if model_name == 'VGG16':
        conv_base = VGG16(weights='imagenet',
                          include_top=False,
                          pooling=pool,
                          input_shape=input_shape)
    elif model_name == 'VGG19':
        conv_base = VGG19(weights='imagenet',
                          include_top=False,
                          pooling=pool,
                          input_shape=input_shape)
    elif model_name == 'ResNet50':
        conv_base = ResNet50(weights='imagenet',
                             include_top=False,
                             pooling=pool,
                             input_shape=input_shape)
    elif model_name == 'InceptionV3':
        conv_base = InceptionV3(weights='imagenet',
                                include_top=False,
                                pooling=pool,
                                input_shape=input_shape)
    elif model_name == 'Xception':
        conv_base = Xception(weights='imagenet',
                             include_top=False,
                             pooling=pool,
                             input_shape=input_shape)
    elif model_name == 'InceptionResNetV2':
        conv_base = InceptionResNetV2(weights='imagenet',
                                      include_top=False,
                                      pooling=pool,
                                      input_shape=input_shape)
    elif model_name == 'NASNetMobile':
        conv_base = NASNetMobile(weights='imagenet',
                                 include_top=False,
                                 pooling=pool,
                                 input_shape=input_shape)
    elif model_name == 'NASNetLarge':
        conv_base = NASNetLarge(weights='imagenet',
                                include_top=False,
                                pooling=pool,
                                input_shape=input_shape)
    elif model_name == 'DenseNet201':
        conv_base = DenseNet201(weights='imagenet',
                                include_top=False,
                                pooling=pool,
                                input_shape=input_shape)
    elif model_name == 'MobileNetV2':
        conv_base = MobileNetV2(weights='imagenet',
                                include_top=False,
                                pooling=pool,
                                input_shape=input_shape)
    else:
        conv_base = None
        print("Model name not known!")
        exit()

    conv_base.trainable = args.base_trainable

    model = models.Sequential()
    if args.base_trainable:
        if args.head == 't_complex':
            model = models.Sequential()
            model.add(conv_base)
            model.add(
                layers.Conv2D(filters=1024,
                              kernel_size=(3, 3),
                              padding='same',
                              strides=1))
            model.add(layers.Flatten())  # ??
            model.add(layers.Dense(1024, activation='sigmoid'))
            model.add(layers.Dense(256, activation='sigmoid'))
            model.add(layers.Dense(args.CLASSES_NO, activation='softmax')
                      )  # (samples, new_rows, new_cols, filters)
        else:
            model.add(conv_base)
            model.add(layers.Dense(args.CLASSES_NO, activation='softmax'))
    elif args.head == 'dense':
        # outside only?
        model.add(conv_base)
        model.add(layers.Flatten())
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(256, activation='relu'))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(128, activation='relu'))
        model.add(layers.Dense(args.CLASSES_NO, activation='softmax'))
    elif args.head == 'max':
        model.add(conv_base)
        model.add(layers.Dense(512, activation='relu'))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(256, activation='relu'))
        model.add(layers.Dense(args.CLASSES_NO, activation='softmax'))
    elif args.head == 'mod':
        model = models.Sequential()
        model.add(conv_base)
        model.add(
            layers.Conv2D(filters=2048, kernel_size=(3, 3), padding='valid'))
        model.add(layers.Flatten())  # ??
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(1024, activation='sigmoid'))
        model.add(layers.Dense(256, activation='relu'))
        model.add(layers.Dense(
            args.CLASSES_NO,
            activation='softmax'))  # (samples, new_rows, new_cols, filters)

    if args.lr_decay:
        lr_schedule = ExponentialDecay(args.INIT_LEARN_RATE,
                                       decay_steps=args.DECAY_STEPS,
                                       decay_rate=args.DECAY_RATE,
                                       staircase=True)
        model.compile(loss='categorical_crossentropy',
                      optimizer=SGD(lr_schedule),
                      metrics=['acc'])  # To different optimisers?
    else:
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(lr=args.LEARNING_RATE),
                      metrics=['acc'])

    with open(os.path.join(log_dir, 'modelsummary.txt'), 'w') as f:
        with redirect_stdout(f):
            model.summary()
    print(model.summary())
    return model
Exemplo n.º 18
0
def train_model(hypa, force_retrain):
    """MAKEDOC: What is train_model doing?"""
    logg = logging.getLogger(f"c.{__name__}.train_model")
    # logg.debug("Starting train_model")

    # get the words
    words = words_types[hypa["words"]]

    # name the model
    model_name = build_cnn_name(hypa)
    logg.debug(f"model_name: {model_name}")

    # save the trained model here
    model_folder = Path("trained_models") / "cnn"
    if not model_folder.exists():
        model_folder.mkdir(parents=True, exist_ok=True)
    model_path = model_folder / f"{model_name}.h5"
    # logg.debug(f"model_path: {model_path}")

    placeholder_path = model_folder / f"{model_name}.txt"
    # check if this model has already been trained
    if placeholder_path.exists():
        if force_retrain:
            logg.warn("\nRETRAINING MODEL!!\n")
        else:
            logg.debug("Already trained")
            return

    # save info regarding the model training in this folder
    info_folder = Path("info") / "cnn" / model_name
    if not info_folder.exists():
        info_folder.mkdir(parents=True, exist_ok=True)

    # magic to fix the GPUs
    setup_gpus()

    # input data
    processed_path = Path("data_proc") / f"{hypa['dataset']}"
    data, labels = load_processed(processed_path, words)

    # from hypa extract model param
    model_param = {}
    model_param["num_labels"] = len(words)
    model_param["input_shape"] = data["training"][0].shape
    model_param["base_filters"] = hypa["base_filters"]
    model_param["base_dense_width"] = hypa["base_dense_width"]

    # translate types to actual values

    kernel_size_types = {
        "01": [(2, 2), (2, 2), (2, 2)],
        "02": [(5, 1), (3, 3), (3, 3)],
        "03": [(1, 5), (3, 3), (3, 3)],
    }
    model_param["kernel_sizes"] = kernel_size_types[hypa["kernel_size_type"]]

    pool_size_types = {
        "01": [(2, 2), (2, 2), (2, 2)],
        "02": [(2, 1), (2, 2), (2, 2)],
        "03": [(1, 2), (2, 2), (2, 2)],
    }
    model_param["pool_sizes"] = pool_size_types[hypa["pool_size_type"]]

    dropout_types = {"01": [0.03, 0.01], "02": [0.3, 0.1]}
    model_param["dropouts"] = dropout_types[hypa["dropout_type"]]

    # a dict to recreate this training
    recap = {}
    recap["words"] = words
    recap["hypa"] = hypa
    recap["model_param"] = model_param
    recap["model_name"] = model_name
    recap["version"] = "002"
    # logg.debug(f"recap: {recap}")
    recap_path = info_folder / "recap.json"
    recap_path.write_text(json.dumps(recap, indent=4))

    learning_rate_types = {
        "01": "fixed01",
        "02": "fixed02",
        "03": "fixed03",
        "e1": "exp_decay_keras_01",
        "04": "exp_decay_step_01",
        "05": "exp_decay_smooth_01",
        "06": "exp_decay_smooth_02",
    }
    learning_rate_type = hypa["learning_rate_type"]
    lr_value = learning_rate_types[learning_rate_type]

    # setup opt fixed lr values
    if lr_value.startswith("fixed"):
        if lr_value == "fixed01":
            lr = 1e-2
        elif lr_value == "fixed02":
            lr = 1e-3
        elif lr_value == "fixed03":
            lr = 1e-4
    else:
        lr = 1e-3

    if lr_value == "exp_decay_keras_01":
        lr = ExponentialDecay(0.1, decay_steps=100000, decay_rate=0.96, staircase=True)

    optimizer_types = {
        "a1": Adam(learning_rate=lr),
        "r1": RMSprop(learning_rate=lr),
    }
    opt = optimizer_types[hypa["optimizer_type"]]

    # create the model
    model = CNNmodel(**model_param)
    # model.summary()

    metrics = [
        tf.keras.metrics.CategoricalAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.CategoricalCrossentropy(),
        metrics=metrics,
    )

    # setup callbacks
    callbacks = []

    # setup exp decay step / smooth
    if lr_value.startswith("exp_decay"):
        if lr_value == "exp_decay_step_01":
            exp_decay_part = partial(exp_decay_step, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_01":
            exp_decay_part = partial(exp_decay_smooth, epochs_drop=5)
        elif lr_value == "exp_decay_smooth_02":
            exp_decay_part = partial(
                exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2
            )
        lrate = LearningRateScheduler(exp_decay_part)
        callbacks.append(lrate)

    # # setup early stopping
    # early_stop = EarlyStopping(
    #     # monitor="val_categorical_accuracy",
    #     monitor="val_loss",
    #     patience=4,
    #     verbose=1,
    #     restore_best_weights=True,
    # )
    # callbacks.append(early_stop)

    # get training parameters
    BATCH_SIZE = hypa["batch_size"]
    SHUFFLE_BUFFER_SIZE = BATCH_SIZE
    EPOCH_NUM = hypa["epoch_num"]

    # load the datasets
    datasets = {}
    for which in ["training", "validation", "testing"]:
        # logg.debug(f"data[{which}].shape: {data[which].shape}")
        datasets[which] = Dataset.from_tensor_slices((data[which], labels[which]))
        # logg.debug(f"datasets[{which}]: {datasets[which]}")
        datasets[which] = datasets[which].shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
        # logg.debug(f"datasets[{which}]: {datasets[which]}")

    # train the model
    results = model.fit(
        data["training"],
        labels["training"],
        # validation_data=datasets["validation"],
        validation_data=(data["validation"], labels["validation"]),
        batch_size=BATCH_SIZE,
        epochs=EPOCH_NUM,
        verbose=1,
        callbacks=callbacks,
    )

    # save the trained model
    model.save(model_path)

    results_recap = {}
    results_recap["model_name"] = model_name

    # version of the results saved
    results_recap["results_recap_version"] = "002"

    # quickly evaluate the results
    # logg.debug(f"\nmodel.metrics_names: {model.metrics_names}")
    # for which in ["training", "validation", "testing"]:
    #     model_eval = model.evaluate(datasets[which])
    #     logg.debug(f"{which}: model_eval: {model_eval}")

    # save the evaluation results
    logg.debug("Evaluate on test data:")
    # eval_testing = model.evaluate(datasets["testing"])
    # results_recap[model.metrics_names[0]] = eval_testing[0]
    # results_recap[model.metrics_names[1]] = eval_testing[1]
    eval_testing = model.evaluate(data["testing"], labels["testing"])
    for metrics_name, value in zip(model.metrics_names, eval_testing):
        logg.debug(f"{metrics_name}: {value}")
        results_recap[metrics_name] = value

    # compute the confusion matrix
    # y_pred = model.predict(datasets["testing"])
    y_pred = model.predict(data["testing"])
    cm = pred_hot_2_cm(labels["testing"], y_pred, words)
    # logg.debug(f"cm: {cm}")
    results_recap["cm"] = cm.tolist()

    # compute the fscore
    fscore = analyze_confusion(cm, words)
    logg.debug(f"fscore: {fscore}")

    # plot the cm
    fig, ax = plt.subplots(figsize=(12, 12))
    plot_confusion_matrix(cm, ax, model_name, words, fscore)
    plot_cm_path = info_folder / "test_confusion_matrix.png"
    fig.savefig(plot_cm_path)
    plt.close(fig)

    # save the histories
    results_recap["history"] = {
        "loss": results.history["loss"],
        "val_loss": results.history["val_loss"],
        "categorical_accuracy": results.history["categorical_accuracy"],
        "val_categorical_accuracy": results.history["val_categorical_accuracy"],
    }

    # save the results
    res_recap_path = info_folder / "results_recap.json"
    res_recap_path.write_text(json.dumps(results_recap, indent=4))

    y_pred_dataset = model.predict(datasets["testing"])
    cm_dataset = pred_hot_2_cm(labels["testing"], y_pred_dataset, words)
    fscore_dataset = analyze_confusion(cm_dataset, words)
    logg.debug(f"fscore_dataset: {fscore_dataset} fscore {fscore}")
    # for i, (ys, yd) in enumerate(zip(y_pred, y_pred_dataset)):
    #     pred_split = np.argmax(ys)
    #     pred_dataset = np.argmax(yd)
    #     logg.debug(f"i: {i} pred_split: {pred_split} pred_dataset: {pred_dataset}")

    # plt.show()

    placeholder_path.write_text(f"Trained. F-score: {fscore}")

    return "done_training"
Exemplo n.º 19
0
    def fit(self,
            X_train,
            y_train,
            X_valid=None,
            y_valid=None,
            epochs=200,
            lr=0.0001,
            batch_size=16):
        # Check that X and y have correct shape
        # X, y = check_X_y(X, y)
        # Store the classes seen during fit
        # self.classes_ = unique_labels(y_train)

        self.X_ = X_train
        self.y_ = y_train

        self.model = Sequential()

        # Recurrent layer
        self.model.add(
            Bidirectional(
                LSTM(128,
                     return_sequences=False,
                     dropout=0.1,
                     recurrent_dropout=0.1)))

        # Fully connected layer
        self.model.add(Dense(128, activation='relu'))

        # Dropout for regularization
        self.model.add(Dropout(0.5))

        # Output layer
        self.model.add(Dense(3, activation='softmax'))

        # scheduler
        lr_schedule = ExponentialDecay(initial_learning_rate=lr,
                                       decay_steps=600,
                                       decay_rate=0.9,
                                       staircase=True)

        # optimizer
        opt = Adam(learning_rate=lr_schedule)

        # Compile the model
        self.model.compile(optimizer=opt,
                           loss='categorical_crossentropy',
                           metrics=['accuracy', AUC(name='auc')])

        # encode class values as integers
        label_encoder = LabelEncoder()
        label_encoder.fit(y_train)
        self.classes_ = label_encoder.classes_
        encoded_y_train = label_encoder.transform(y_train)

        # convert integers to dummy variables (i.e. one hot encoded)
        dummy_y_train = np_utils.to_categorical(encoded_y_train)

        X_train_ = self.get_embeddings(X_train)

        if X_valid is not None:
            encoded_y_valid = label_encoder.transform(y_valid)
            dummy_y_valid = np_utils.to_categorical(encoded_y_valid)
            X_valid_ = self.get_embeddings(X_valid)

            self.history_ = self.model.fit(X_train_,
                                           dummy_y_train,
                                           batch_size=batch_size,
                                           epochs=epochs,
                                           callbacks=self.callbacks,
                                           validation_data=(X_valid_,
                                                            dummy_y_valid))
        else:
            self.history_ = self.model.fit(X_train_,
                                           dummy_y_train,
                                           batch_size=batch_size,
                                           epochs=epochs,
                                           callbacks=self.callbacks)
        return self
Exemplo n.º 20
0
def choose_scheduler(model_config):
    """
    Define the optimizer used for training the RelevanceModel
    Users have the option to define an ExponentialDecay learning rate schedule

    Parameters
    ----------
    model_config : dict
        model configuration doctionary

    Returns
    -------
    tensorflow learning rate scheduler

    Notes
    -----
    References:
        https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer
        https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules/ExponentialDecay
        https://arxiv.org/pdf/1506.01186.pdf
    """

    if 'lr_schedule' not in model_config:
        # use constant lr schedule
        learning_rate_schedule = ExponentialDecay(
            initial_learning_rate=OptimizerDefaultValues.CONSTANT_LR,
            decay_steps=10000000,
            decay_rate=1.0,
        )

    else:
        lr_schedule = model_config['lr_schedule']
        lr_schedule_key = lr_schedule['key']

        if lr_schedule_key == LearningRateScheduleKey.EXPONENTIAL:
            learning_rate_schedule = ExponentialDecay(
                initial_learning_rate=lr_schedule.get(
                    'learning_rate', OptimizerDefaultValues.CONSTANT_LR),
                decay_steps=lr_schedule.get(
                    'learning_rate_decay_steps',
                    OptimizerDefaultValues.EXP_DECAY_STEPS),
                decay_rate=lr_schedule.get(
                    'learning_rate_decay',
                    OptimizerDefaultValues.EXP_DECAY_RATE),
                staircase=True,
            )

        elif lr_schedule_key == LearningRateScheduleKey.CONSTANT:
            learning_rate_schedule = ExponentialDecay(
                initial_learning_rate=lr_schedule.get(
                    'learning_rate', OptimizerDefaultValues.CONSTANT_LR),
                decay_steps=10000000,
                decay_rate=1.0,
            )
        elif lr_schedule_key == LearningRateScheduleKey.REDUCE_LR_ON_PLATEAU:
            learning_rate_schedule = lr_schedule.get(
                'learning_rate', OptimizerDefaultValues.CONSTANT_LR)

        elif lr_schedule_key == LearningRateScheduleKey.CYCLIC:
            lr_schedule_type = lr_schedule['type']
            if lr_schedule_type == CyclicLearningRateType.TRIANGULAR:
                learning_rate_schedule = cyclic_learning_rate.TriangularCyclicalLearningRate(
                    initial_learning_rate=lr_schedule.get(
                        'initial_learning_rate',
                        OptimizerDefaultValues.CYCLIC_INITIAL_LEARNING_RATE),
                    maximal_learning_rate=lr_schedule.get(
                        'maximal_learning_rate',
                        OptimizerDefaultValues.CYCLIC_MAXIMAL_LEARNING_RATE),
                    step_size=lr_schedule.get(
                        'step_size', OptimizerDefaultValues.CYCLIC_STEP_SIZE),
                )
            elif lr_schedule_type == CyclicLearningRateType.TRIANGULAR2:
                learning_rate_schedule = cyclic_learning_rate.Triangular2CyclicalLearningRate(
                    initial_learning_rate=lr_schedule.get(
                        'initial_learning_rate',
                        OptimizerDefaultValues.CYCLIC_INITIAL_LEARNING_RATE),
                    maximal_learning_rate=lr_schedule.get(
                        'maximal_learning_rate',
                        OptimizerDefaultValues.CYCLIC_MAXIMAL_LEARNING_RATE),
                    step_size=lr_schedule.get(
                        'step_size', OptimizerDefaultValues.CYCLIC_STEP_SIZE),
                )
            elif lr_schedule_type == CyclicLearningRateType.EXPONENTIAL:
                learning_rate_schedule = cyclic_learning_rate.ExponentialCyclicalLearningRate(
                    initial_learning_rate=lr_schedule.get(
                        'initial_learning_rate',
                        OptimizerDefaultValues.CYCLIC_INITIAL_LEARNING_RATE),
                    maximal_learning_rate=lr_schedule.get(
                        'maximal_learning_rate',
                        OptimizerDefaultValues.CYCLIC_MAXIMAL_LEARNING_RATE),
                    step_size=lr_schedule.get(
                        'step_size', OptimizerDefaultValues.CYCLIC_STEP_SIZE),
                    gamma=lr_schedule.get('gamma',
                                          OptimizerDefaultValues.CYCLIC_GAMMA),
                )
            else:
                raise ValueError(
                    "Unsupported cyclic learning rate schedule type key: " +
                    lr_schedule_type)
        else:
            raise ValueError("Unsupported learning rate schedule key: " +
                             lr_schedule_key)

    return learning_rate_schedule
def get_Papakostas_model(PARAMS, n_classes=2):
    '''
    CNN architecture proposed by Papakostas et al. [2]    

    Parameters
    ----------
    PARAMS : dict
        Contains various parameters.
    n_classes : int, optional
        Number of classes. Default is 2.

    Returns
    -------
    model : tensorflow.keras.models.Model
        Cascaded MTL CNN model.
    learning_rate : float
        Initial learning rate.

    '''
    input_img = Input(PARAMS['input_shape'][PARAMS['Model']])

    x = Conv2D(96,
               input_shape=PARAMS['input_shape'][PARAMS['Model']],
               kernel_size=(5, 5),
               strides=(2, 2),
               kernel_initializer=RandomNormal(stddev=0.01),
               bias_initializer=Constant(value=0.1))(input_img)
    x = Lambda(lambda norm_lyr: LRN(
        norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = Conv2D(384,
               kernel_size=(3, 3),
               strides=(2, 2),
               kernel_initializer=RandomNormal(stddev=0.01),
               bias_initializer=Constant(value=0.1))(x)
    x = Lambda(lambda norm_lyr: LRN(
        norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = Conv2D(512,
               kernel_size=(3, 3),
               strides=(1, 1),
               kernel_initializer=RandomNormal(stddev=0.01),
               bias_initializer=Constant(value=0.1),
               padding='same')(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = Flatten()(x)

    x = Dense(4096,
              kernel_initializer=RandomNormal(stddev=0.01),
              bias_initializer=Constant(value=0.1))(x)
    x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    x = Dense(4096,
              kernel_initializer=RandomNormal(stddev=0.01),
              bias_initializer=Constant(value=0.1))(x)
    x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    output = Dense(n_classes,
                   activation='softmax',
                   kernel_initializer=RandomNormal(stddev=0.01),
                   bias_initializer=Constant(value=0.1))(x)

    model = Model(input_img, output)

    initial_learning_rate = 0.001
    lr_schedule = ExponentialDecay(initial_learning_rate,
                                   decay_steps=700,
                                   decay_rate=0.1)
    optimizer = optimizers.SGD(learning_rate=lr_schedule)

    if n_classes == 2:
        model.compile(loss='binary_crossentropy',
                      optimizer=optimizer,
                      metrics=['accuracy'])
    elif n_classes == 3:
        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizer,
                      metrics=['accuracy'])

    print(model.summary())
    print(
        'Architecture proposed by Papakostas et al. Expert Systems with Applications 2018\n'
    )

    return model, initial_learning_rate
Exemplo n.º 22
0
    def __init__(self, config):
        self.ds_train, self.config = get_dataset_and_info(config)
        # ["/gpu:{}".format(i) for i in range(self.config['num_gpu'])]
        self.strategy = tf.distribute.MirroredStrategy() \
                        if len(self.config['gpu']) > 1 \
                        else tf.distribute.OneDeviceStrategy(device="/gpu:0")

        self.steps_per_epoch = self.config['num_records'] // self.config[
            'global_batch_size']
        print("total steps: ", self.steps_per_epoch * self.config['epoch'])

        self.ds_train = self.strategy.experimental_distribute_dataset(
            self.ds_train)

        with self.strategy.scope():
            if self.config['model'] == 'vanilla':
                self.generator = get_generator(self.config)
                self.discriminator = get_discriminator(self.config)
            #TODO: fix resnet model
            #elif config['model'] == 'resnet':
            #    self.generator = get_res_generator(config)
            #    self.discriminator = get_res_discriminator(config)
            else:
                raise ValueError('Unsupported model type')

            lr_fn_G = ExponentialDecay(self.config['lr_g'],
                                       self.steps_per_epoch,
                                       decay_rate=self.config['decay_rate'],
                                       staircase=True)
            lr_fn_D = ExponentialDecay(self.config['lr_d'],
                                       self.steps_per_epoch *
                                       self.config['update_ratio'],
                                       decay_rate=self.config['decay_rate'],
                                       staircase=True)
            self.optimizer_G = optimizers.Adam(learning_rate=lr_fn_G,
                                               beta_1=0.)
            self.optimizer_D = optimizers.Adam(learning_rate=lr_fn_D,
                                               beta_1=0.)

        if self.config['loss'] == "cross_entropy":
            print("use ce loss")
            self.gloss_fn = cross_entropy_g
            self.dloss_fn = cross_entropy_d
        elif self.config['loss'] == "hinge_loss":
            print("use hinge loss")
            self.gloss_fn = hinge_loss_g
            self.dloss_fn = hinge_loss_d
        else:
            raise ValueError('Unsupported loss type')

        # build model & get trainable variables.
        self.generator.build(
            input_shape=[(self.config['batch_size'],
                          self.config['z_dim']), (self.config['batch_size'])])
        self.discriminator.build(
            input_shape=[(self.config['batch_size'], config['img_size'],
                          config['img_size'], 3), (self.config['batch_size'])])
        self.generator.summary()
        self.discriminator.summary()

        self.var_G = [var.name for var in self.generator.variables]
        self.Train_var_G = [
            var.name for var in self.generator.trainable_variables
        ]
        self.Train_var_D = [
            var.name for var in self.discriminator.trainable_variables
        ]

        print("-" * 20, "generator weights", "-" * 20)
        pprint(self.Train_var_G)
        print("-" * 20, "discrimiator weights", "-" * 20)
        pprint(self.Train_var_D)

        # checkpoints
        self.ckpt_G = tf.train.Checkpoint(step=tf.Variable(1),
                                          optimizer=self.optimizer_G,
                                          net=self.generator)
        self.ckpt_D = tf.train.Checkpoint(step=tf.Variable(1),
                                          optimizer=self.optimizer_D,
                                          net=self.discriminator)
        self.CkptManager_G = tf.train.CheckpointManager(
            self.ckpt_G,
            '{}/G'.format(self.config['ckpt_dir']),
            max_to_keep=10,
            checkpoint_name='epoch')
        self.CkptManager_D = tf.train.CheckpointManager(
            self.ckpt_D,
            '{}/D'.format(self.config['ckpt_dir']),
            max_to_keep=10,
            checkpoint_name='epoch')

        # metrics
        self.metrics = {}
        self.metrics['G_loss'] = tf.keras.metrics.Mean('generator_loss',
                                                       dtype=tf.float32)
        self.metrics['D_loss'] = tf.keras.metrics.Mean('discriminator_loss',
                                                       dtype=tf.float32)
        self.metrics.update({
            name: tf.keras.metrics.Mean(name, dtype=tf.float32)
            for name in self.var_G
        })
        self.metrics.update({
            name + '/norm': tf.keras.metrics.Mean(name + '/norm',
                                                  dtype=tf.float32)
            for name in self.Train_var_G
        })
        #for name in self.Train_var_G:
        #    self.metrics[name] =
        #var_name = [var.name for var in self.generator.variables]
        #for name in var_name:
        #    self.metrics[name] = tf.keras.metrics.Mean(
        #    name, dtype=tf.float32)

        self.fixed_vector = tf.random.normal(
            [config['batch_size'], config['z_dim']])
        self.fixed_label = tf.random.uniform((self.config['batch_size'], ),
                                             0,
                                             self.config['num_classes'],
                                             dtype=tf.int32)
    def __init__(
        self,
        seed: int,
        DQN_type: str,
        gamma: float,
        epsilon: float,
        min_eps_pct: float,
        min_eps: float,
        max_exp_pct: float,
        update_target: str,
        copy_step: int,
        tau: float,
        input_shape: int,
        hidden_units: list,
        hidden_memory_units: list,
        batch_size: int,
        selected_loss: str,
        lr: float,
        start_train: int,
        optimizer_name: str,
        batch_norm_input: str,
        batch_norm_hidden: str,
        activation: str,
        kernel_initializer: str,
        action_space,
        use_PER: bool = False,
        PER_e: Optional[float] = None,
        PER_a: Optional[float] = None,
        PER_b: Optional[float] = None,
        final_PER_b: Optional[float] = None,
        PER_b_growth: Optional[float] = None,
        final_PER_a: Optional[float] = None,
        PER_a_growth: Optional[float] = None,
        sample_type: str = "TDerror",
        beta_1: float = 0.9,
        beta_2: float = 0.999,
        eps_opt: float = 1e-07,
        lr_schedule: Optional[str] = None,
        exp_decay_pct: Optional[float] = None,
        exp_decay_rate: Optional[float] = None,
        rng=None,
        N_train: int = 100000,
        modelname: str = "Deep Network",
    ):
        """
        Instantiate DQN Class

        Parameters
        ----------
        seed: int
            Seed for experiment reproducibility

        DQN_type: str
            DQN variant choice. It can be 'DQN' or 'DDQN'

        recurrent_env: bool
            Boolean to regulate if the environment is recurrent or not

        gamma: float
            Discount parameter for the target update

        max_exp_pct: int
            Max size of the experience replay buffer as a pct of the total iterations

        update_target: str
            Choice for target update. It can be 'hard' or 'soft'

        tau: float
            When the update is 'soft', tau regulates the amount of the update
            towards the current parameters

        input_shape: int
            Shape of input of the neural network

        hidden_units: list
            List of sizes of hidden layers. The length of the list determines
            the depth of the Q network

        hidden_memory_units: list,
            List of sizes of recurrent hidden layers. The length of the list determines
            the depth of the Q network

        batch_size: int
            Size of the batch to perform an update

        selected_loss: str
            Choice for the loss function. It can be 'mse' or 'huber'

        lr: float
            Initial learning rate

        start_train: int
            Number of iteration after which the training starts

        optimizer_name: str
            Choice for the optimizer. It can be 'sgd', 'sgdmom', 'sgdnest',
            'adagrad', 'adadelta', 'adamax', 'adam', 'amsgrad', 'nadam', or 'rmsprop'

        batch_norm_input: bool
            Boolean to regulate the presence of a Batch Norm layer after the input

        batch_norm_hidden: bool
            Boolean to regulate the presence of a Batch Norm layer after each hidden layer

        activation: str
            Choice of activation function. It can be 'leaky_relu',
            'relu6' or 'elu'

        kernel_initializer: str
            Choice of weight initialization as aliased in TF2.0 documentation

        plot_hist: bool
            Boolean to regulate if plot the histogram of intermediate outputs
            in tensorboard

        plot_steps_hist: int
            Number of steps at which the histogram of intermediate outputs are
            plotted in tensorboard

        plot_steps: int
            Number of steps at which all the other variables are
            stored in tensorboard

        summary_writer, #TODO need to add proper type hint
            Tensorabord writer
        action_space: class
            Space of possible action as class that inherits from gym

        use_PER: bool = False
            Boolean to regulate if use Prioritized Experience Replay (PER) or not

        PER_e: Optional[float]
            Correction for priorities

        PER_a: Optional[float]
            Amount of prioritization

        PER_b: Optional[float]
            Amount of correction for introduced bias when using PER

        final_PER_b: Optional[float] = None
            Final value for b after the anneal

        PER_b_growth: Optional[float]
            Rate of increase of the b

        final_PER_a: Optional[float] = None
            Final value for a after the anneal

        PER_a_growth: Optional[float]
            Rate of increase of the a

        sample_type : str
            Type of sampling in PER. It can be 'TDerror', 'diffTDerror' or 'reward'

        clipgrad: bool
            Choice of the gradients to clip. It can be 'norm', 'value' or 'globnorm'

        clipnorm: Optional[Union[str or float]]
            Boolean for clipping the norm of the gradients

        clipvalue: Optional[Union[str or float]]
            Boolean for clipping the value of the gradients

        clipglob_steps: Optional[int]
            Boolean for clipping the global norm of the gradients

        beta_1: float = 0.9
            Parameter for adaptive optimizer

        beta_2: float = 0.999
            Parameter for adaptive optimizer

        eps_opt: float = 1e-07
            Corrective parameter for adaptive optimizer

        std_rwds: bool = False
            Boolean to regulate if standardize rewards or not

        lr_schedule: Optional[str]
            Choice for the learning rate schedule. It can be 'exponential',
            'piecewise', 'inverse_time' or 'polynomial'

        exp_decay_pct: Optional[float]
             Amount of steps to reach the desired level of decayed learning rate as pct
             of the total iteration

        exp_decay_rate: Optional[float]
            Rate of decay to reach the desired level of decayed learning rate

        rng = None
            Random number generator for reproducibility

        modelname: str
            Name for the model

        """

        if rng is not None:
            self.rng = rng

        self.batch_size = batch_size

        exp_decay_steps = int(N_train * exp_decay_pct)
        if lr_schedule == "exponential":
            lr = ExponentialDecay(
                initial_learning_rate=lr,
                decay_steps=exp_decay_steps,
                decay_rate=exp_decay_rate,
            )

        if optimizer_name == "adam":
            self.optimizer = tf.keras.optimizers.Adam(
                learning_rate=lr,
                beta_1=beta_1,
                beta_2=beta_2,
                epsilon=eps_opt,
                amsgrad=False,
            )
        elif optimizer_name == "rmsprop":
            self.optimizer = tf.keras.optimizers.RMSprop(
                learning_rate=lr,
                rho=beta_1,
                momentum=0.0,
                epsilon=eps_opt,
                centered=False,
            )

        self.beta_1 = beta_1
        self.eps_opt = eps_opt
        self.gamma = gamma

        self.max_experiences = int(N_train * max_exp_pct)

        self.use_PER = use_PER
        if self.use_PER:

            if PER_b_growth:
                PER_b_steps = N_train
                PER_b_growth = (final_PER_b - PER_b) / PER_b_steps
            else:
                PER_b_growth = 0.0
                PER_b_steps = None

            if PER_a_growth:
                PER_a_steps = PER_a_steps = N_train
                PER_a_growth = (final_PER_a - PER_a) / PER_a_steps
            else:
                PER_a_growth = 0.0
                PER_a_steps = None

            self.PERmemory = PER_buffer(
                PER_e,
                PER_a,
                PER_b,
                final_PER_b,
                PER_b_steps,
                PER_b_growth,
                final_PER_a,
                PER_a_steps,
                PER_a_growth,
                self.max_experiences,
                rng,
                sample_type,
            )  # experience is stored as object of this class
        else:
            self.experience = {
                "s": [],
                "a": [],
                "r": [],
                "s2": [],
                "a_unsc": []
            }

        self.start_train = start_train
        self.action_space = action_space
        self.num_actions = len(self.action_space.values)
        self.batch_norm_input = batch_norm_input
        self.batch_norm_hidden = batch_norm_hidden

        self.model = DeepNetworkModel(
            seed,
            input_shape,
            hidden_units,
            self.num_actions,
            batch_norm_input,
            batch_norm_hidden,
            activation,
            kernel_initializer,
            modelname,
        )

        self.target_model = DeepNetworkModel(
            seed,
            input_shape,
            hidden_units,
            self.num_actions,
            batch_norm_input,
            batch_norm_hidden,
            activation,
            kernel_initializer,
            "Target " + modelname,
        )

        self.selected_loss = selected_loss
        self.DQN_type = DQN_type
        self.update_target = update_target
        self.copy_step = copy_step
        self.tau = tau
        self.optimizer_name = optimizer_name

        if self.selected_loss == "mse":
            self.loss = tf.keras.losses.MeanSquaredError()
        elif self.selected_loss == "huber":
            self.loss = tf.keras.losses.Huber()

        self.epsilon = epsilon
        self.min_eps = min_eps
        self.min_eps_pct = min_eps_pct
Exemplo n.º 24
0
    return TensorBoard(log_dir=_log_dir + exp_name,
                       profile_batch=0,
                       histogram_freq=1)


def build_baseline_vgg():
    pass
    return


_n_train, _n_valid, _n_test = report_data_size()
_shuffle = True
_log_dir = './logs/baseline_model/'
_seed = 27
_learning_rate = 0.0001
_schedule = ExponentialDecay(_learning_rate,
                             decay_steps=10_0000,
                             decay_rate=0.96)
_opt = Adam(learning_rate=_schedule)
_es = EarlyStopping(monitor='val_accuracy', patience=20)
_tb = tb_callback('Baseline_model_1')
_callbacks = [_es]
_metrics = ['accuracy']
_loss = 'categorical_crossentropy'
_steps_per_epoch = _n_train // _batch_size

baseline_model1 = build_baseline_vgg()
baseline_model1_hist = baseline_model1.fit(train_data,
                                           epochs=_epochs,
                                           validation_data=valid_data,
                                           steps_per_epoch=_steps_per_epoch,
                                           callbacks=_callbacks,
def get_Papakostas_MTL_model(PARAMS, n_classes=3):
    '''
    MTL modification of the CNN architecture proposed by Papakostas et al. [2]    

        [2] Papakostas, M., & Giannakopoulos, T. (2018). Speech-music 
    discrimination using deep visual feature extractors. Expert Systems with 
    Applications, 114, 334-344.

    Parameters
    ----------
    PARAMS : dict
        Contains various parameters.
    n_classes : int, optional
        Number of classes. Default is 3.

    Returns
    -------
    model : tensorflow.keras.models.Model
        MTL CNN model.
    learning_rate : float
        Initial learning rate.

    '''
    input_img = Input(PARAMS['input_shape'][PARAMS['Model']])

    x = Conv2D(96,
               input_shape=PARAMS['input_shape'][PARAMS['Model']],
               kernel_size=(5, 5),
               strides=(2, 2),
               kernel_initializer=RandomNormal(stddev=0.01),
               bias_initializer=Constant(value=0.1))(input_img)
    x = Lambda(lambda norm_lyr: LRN(
        norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x)
    # x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = Conv2D(384,
               kernel_size=(3, 3),
               strides=(2, 2),
               kernel_initializer=RandomNormal(stddev=0.01),
               bias_initializer=Constant(value=0.1))(x)
    x = Lambda(lambda norm_lyr: LRN(
        norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x)
    # x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = Conv2D(512,
               kernel_size=(3, 3),
               strides=(1, 1),
               kernel_initializer=RandomNormal(stddev=0.01),
               bias_initializer=Constant(value=0.1),
               padding='same')(x)
    # x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x)

    x = Flatten()(x)

    x = Dense(4096,
              kernel_initializer=RandomNormal(stddev=0.01),
              bias_initializer=Constant(value=0.1))(x)
    x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    x = Dense(4096,
              kernel_initializer=RandomNormal(stddev=0.01),
              bias_initializer=Constant(value=0.1))(x)
    x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    x = Dropout(0.5)(x)

    classification_output = Dense(n_classes,
                                  activation='softmax',
                                  kernel_initializer=RandomNormal(stddev=0.01),
                                  bias_initializer=Constant(value=0.1),
                                  name='3C')(x)

    sp_output, x_sp, mu_output, x_mu, smr_output, x_smr = MTL_modifications(x)

    model = Model(input_img,
                  [sp_output, mu_output, smr_output, classification_output])

    initial_learning_rate = 0.001
    lr_schedule = ExponentialDecay(initial_learning_rate,
                                   decay_steps=700,
                                   decay_rate=0.1)
    optimizer = optimizers.SGD(learning_rate=lr_schedule)
    model.compile(loss={
        'S': 'binary_crossentropy',
        'M': 'binary_crossentropy',
        'R': 'mean_squared_error',
        '3C': 'categorical_crossentropy'
    },
                  optimizer=optimizer,
                  metrics={'3C': 'accuracy'})

    print(model.summary())
    print(
        'MTL modifications of architecture proposed by Papakostas et al. Expert Systems with Applications 2018\n'
    )

    return model, initial_learning_rate
Exemplo n.º 26
0
def choose_scheduler(model_config):
    """
        Define the optimizer used for training the RelevanceModel
        Users have the option to define an ExponentialDecay learning rate schedule

        Parameters
        ----------
            model_config : dict
                model configuration doctionary

        Returns
        -------
            tensorflow learning rate scheduler

        Notes
        -----
        References:
            https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer
            https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules/ExponentialDecay
            https://arxiv.org/pdf/1506.01186.pdf
        """

    if 'lr_schedule' not in model_config:
        #use constant lr schedule
        learning_rate_schedule = ExponentialDecay(
            initial_learning_rate=0.01,
            decay_steps=10000000,
            decay_rate=1.0,
        )

    else:
        lr_schedule = model_config['lr_schedule']
        lr_schedule_key = lr_schedule['key']

        if lr_schedule_key == LearningRateScheduleKey.EXPONENTIAL:
            learning_rate_schedule = ExponentialDecay(
                initial_learning_rate=lr_schedule['learning_rate']
                if 'learning_rate' in lr_schedule else 0.01,
                decay_steps=lr_schedule['learning_rate_decay_steps']
                if 'learning_rate_decay_steps' in lr_schedule else 100000,
                decay_rate=lr_schedule['learning_rate_decay']
                if 'learning_rate_decay' in lr_schedule else 0.96,
                staircase=True,
            )

        elif lr_schedule_key == LearningRateScheduleKey.CONSTANT:
            learning_rate_schedule = ExponentialDecay(
                initial_learning_rate=lr_schedule['learning_rate']
                if 'learning_rate' in lr_schedule else 0.01,
                decay_steps=10000000,
                decay_rate=1.0,
            )

        elif lr_schedule_key == LearningRateScheduleKey.CYCLIC:
            lr_schedule_type = lr_schedule['type']
            if lr_schedule_type == CyclicLearningRateType.TRIANGULAR:
                learning_rate_schedule = cyclic_learning_rate.TriangularCyclicalLearningRate(
                    initial_learning_rate=lr_schedule['initial_learning_rate']
                    if 'initial_learning_rate' in lr_schedule else 0.001,
                    maximal_learning_rate=lr_schedule['maximal_learning_rate']
                    if 'maximal_learning_rate' in lr_schedule else 0.01,
                    step_size=lr_schedule['step_size']
                    if 'step_size' in lr_schedule else 10,
                )
            elif lr_schedule_type == CyclicLearningRateType.TRIANGULAR2:
                learning_rate_schedule = cyclic_learning_rate.Triangular2CyclicalLearningRate(
                    initial_learning_rate=lr_schedule['initial_learning_rate']
                    if 'initial_learning_rate' in lr_schedule else 0.001,
                    maximal_learning_rate=lr_schedule['maximal_learning_rate']
                    if 'maximal_learning_rate' in lr_schedule else 0.01,
                    step_size=lr_schedule['step_size']
                    if 'step_size' in lr_schedule else 10,
                )
            elif lr_schedule_type == CyclicLearningRateType.EXPONENTIAL:
                learning_rate_schedule = cyclic_learning_rate.ExponentialCyclicalLearningRate(
                    initial_learning_rate=lr_schedule['initial_learning_rate']
                    if 'initial_learning_rate' in lr_schedule else 0.001,
                    maximal_learning_rate=lr_schedule['maximal_learning_rate']
                    if 'maximal_learning_rate' in lr_schedule else 0.01,
                    step_size=lr_schedule['step_size']
                    if 'step_size' in lr_schedule else 10,
                    gamma=lr_schedule['gamma']
                    if 'gamma' in lr_schedule else 1.0,
                )
            else:
                raise ValueError(
                    "Unsupported cyclic learning rate schedule type key: " +
                    lr_schedule_type)
        else:
            raise ValueError("Unsupported learning rate schedule key: " +
                             lr_schedule_key)

    return learning_rate_schedule
Exemplo n.º 27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('output', help='Model output name')
    args = parser.parse_args()

    x_gyro = []
    x_acc = []

    y = []

    imu_data_filenames = []
    gt_data = []

    for i in range(9):
        data_imu_path = f'/home/huydung/devel/intern/data/1ere/{i}/data_deep/imu/'
        for j in range(
                len([
                    name for name in os.listdir(data_imu_path)
                    if os.path.isfile(os.path.join(data_imu_path, name))
                ])):
            imu_data_filenames.append(data_imu_path + f'{j}.csv')
            gt_data.append(np.array([0., 1.]))
    for i in range(9):
        data_imu_path = f'/home/huydung/devel/intern/data/2eme/{i}/data_deep/imu/'
        for j in range(
                len([
                    name for name in os.listdir(data_imu_path)
                    if os.path.isfile(os.path.join(data_imu_path, name))
                ])):
            imu_data_filenames.append(data_imu_path + f'{j}.csv')
            gt_data.append(np.array([1., 0.]))

    for i, (cur_imu_data_filename,
            cur_gt_data) in enumerate(zip(imu_data_filenames, gt_data)):
        cur_x_gyro, cur_x_acc, cur_gt = load_cea_dataset(
            cur_imu_data_filename, cur_gt_data)
        x_gyro.append(cur_x_gyro)
        x_acc.append(cur_x_acc)
        y.append(cur_gt)

    x_gyro = np.reshape(x_gyro,
                        (len(x_gyro), x_gyro[0].shape[0], x_gyro[0].shape[1]))
    x_acc = np.reshape(x_acc,
                       (len(x_acc), x_acc[0].shape[0], x_acc[0].shape[1]))

    y = np.vstack(y)

    x_gyro, x_acc, y = shuffle(x_gyro, x_acc, y)

    initial_learning_rate = 3e-4
    lr_schedule = ExponentialDecay(initial_learning_rate,
                                   decay_steps=100000,
                                   decay_rate=0.97,
                                   staircase=True)
    pred_model = create_pred_model_6d_quat()
    # train_model = create_train_model_6d_quat(pred_model)
    pred_model.compile(optimizer=Adam(initial_learning_rate),
                       loss='categorical_crossentropy')

    filepath = "model_checkpoint.hdf5"
    model_checkpoint = ModelCheckpoint('model_checkpoint.hdf5',
                                       monitor='val_loss',
                                       save_best_only=True,
                                       verbose=1)
    tensorboard = TensorBoard(log_dir="logs/{}".format(time()),
                              profile_batch=0)

    try:
        history = pred_model.fit([x_gyro, x_acc],
                                 y,
                                 epochs=20,
                                 batch_size=1,
                                 verbose=1,
                                 callbacks=[model_checkpoint, tensorboard],
                                 validation_split=0.1)
        pred_model.load_weights(filepath)
        pred_model.save('last_best_model_with_custom_layer.hdf5')
        # pred_model = create_pred_model_6d_quat(window_size)
        pred_model.set_weights(pred_model.get_weights())
        pred_model.save('%s.hdf5' % args.output)

        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='upper left')
        plt.show()

    except KeyboardInterrupt:
        pred_model.load_weights(filepath)
        pred_model.save('last_best_model_with_custom_layer.hdf5')
        # pred_model = create_pred_model_6d_quat(window_size)
        pred_model.set_weights(pred_model.get_weights())
        pred_model.save('%s.hdf5' % args.output)
        print('Early terminate')

    print('Training complete')
Exemplo n.º 28
0
    def get_model(pretrained_weights=None):

        input_layer = Input(shape=(50, 50, 1))

        # Down-sampling
        conv1 = Conv2D(64, kernel_size=3,
                       padding="VALID")(input_layer)  # 48 x 48
        conv1 = BatchNormalization()(conv1)
        conv1 = Activation("relu")(conv1)
        # conv1 = Dropout(0.1)(conv1)

        conv1 = Conv2D(64, kernel_size=3, padding="SAME")(conv1)  # 48 x 48
        conv1 = BatchNormalization()(conv1)
        conv1 = Activation("relu")(conv1)
        # conv1 = Dropout(0.1)(conv1)

        conv1 = Conv2D(64, kernel_size=3, padding="SAME")(conv1)  # 48 x 48
        conv1 = BatchNormalization()(conv1)
        conv1 = Activation("relu")(conv1)
        # conv1 = Dropout(0.1)(conv1)

        pool1 = MaxPooling2D(pool_size=2)(conv1)  # 24 x 24

        conv2 = Conv2D(128, kernel_size=3, padding="SAME")(pool1)  # 24 x 24
        conv2 = BatchNormalization()(conv2)
        conv2 = Activation("relu")(conv2)
        # conv2 = Dropout(0.1)(conv2)

        conv2 = Conv2D(128, kernel_size=3, padding="SAME")(conv2)  # 24 x 24
        conv2 = BatchNormalization()(conv2)
        conv2 = Activation("relu")(conv2)
        # conv2 = Dropout(0.1)(conv2)

        conv2 = Conv2D(128, kernel_size=3, padding="SAME")(conv2)  # 24 x 24
        conv2 = BatchNormalization()(conv2)
        conv2 = Activation("relu")(conv2)
        # conv2 = Dropout(0.1)(conv2)

        pool2 = MaxPooling2D(pool_size=2)(conv2)  # 12 x 12

        conv3 = Conv2D(256, kernel_size=3, padding="SAME")(pool2)  # 12 x 12
        conv3 = BatchNormalization()(conv3)
        conv3 = Activation("relu")(conv3)
        # conv3 = Dropout(0.1)(conv3)

        conv3 = Conv2D(256, kernel_size=3, padding="SAME")(conv3)  # 12 x 12
        conv3 = BatchNormalization()(conv3)
        conv3 = Activation("relu")(conv3)
        # conv3 = Dropout(0.1)(conv3)

        conv3 = Conv2D(256, kernel_size=3, padding="SAME")(conv3)  # 12 x 12
        conv3 = BatchNormalization()(conv3)
        conv3 = Activation("relu")(conv3)
        # conv3 = Dropout(0.1)(conv3)

        pool3 = MaxPooling2D(pool_size=2)(conv3)  # 6 x 6

        conv4 = Conv2D(512, kernel_size=3, padding="SAME")(pool3)  # 6 x 6
        conv4 = BatchNormalization()(conv4)
        conv4 = Activation("relu")(conv4)
        # conv4 = Dropout(0.1)(conv4)

        conv4 = Conv2D(512, kernel_size=3, padding="SAME")(conv4)  # 6 x 6
        conv4 = BatchNormalization()(conv4)
        conv4 = Activation("relu")(conv4)
        # conv4 = Dropout(0.1)(conv4)

        conv4 = Conv2D(512, kernel_size=3, padding="SAME")(conv4)  # 6 x 6
        conv4 = BatchNormalization()(conv4)
        conv4 = Activation("relu")(conv4)
        # conv4 = Dropout(0.1)(conv4)

        ###

        # Up-sampling
        up5 = (UpSampling2D(size=(2, 2))(conv4))  # 12 x 12
        merge5 = Concatenate()([conv3, up5])

        conv5 = Conv2D(256, kernel_size=2, padding="SAME")(merge5)
        conv5 = BatchNormalization()(conv5)
        conv5 = Activation("relu")(conv5)
        # conv5 = Dropout(0.1)(conv5)

        conv5 = Conv2D(256, kernel_size=3, padding="SAME")(conv5)
        conv5 = BatchNormalization()(conv5)
        conv5 = Activation("relu")(conv5)
        # conv5 = Dropout(0.1)(conv5)

        conv5 = Conv2D(256, kernel_size=3, padding="SAME")(conv5)
        conv5 = BatchNormalization()(conv5)
        conv5 = Activation("relu")(conv5)
        # conv5 = Dropout(0.1)(conv5)

        up6 = (UpSampling2D(size=(2, 2))(conv5))  # 24 x 24
        merge6 = Concatenate()([conv2, up6])

        conv6 = Conv2D(128, kernel_size=2, padding="SAME")(merge6)
        conv6 = BatchNormalization()(conv6)
        conv6 = Activation("relu")(conv6)
        # conv6 = Dropout(0.1)(conv6)

        conv6 = Conv2D(128, kernel_size=3, padding="SAME")(conv6)
        conv6 = BatchNormalization()(conv6)
        conv6 = Activation("relu")(conv6)
        # conv6 = Dropout(0.1)(conv6)

        conv6 = Conv2D(128, kernel_size=3, padding="SAME")(conv6)
        conv6 = BatchNormalization()(conv6)
        conv6 = Activation("relu")(conv6)
        # conv6 = Dropout(0.1)(conv6)

        up7 = (UpSampling2D(size=(2, 2))(conv6))  # 48 x 48
        merge7 = Concatenate()([conv1, up7])

        conv7 = Conv2D(64, kernel_size=2, padding="SAME")(merge7)
        conv7 = BatchNormalization()(conv7)
        conv7 = Activation("relu")(conv7)
        # conv7 = Dropout(0.1)(conv7)

        conv7 = Conv2D(64, kernel_size=3, padding="SAME")(conv7)
        conv7 = BatchNormalization()(conv7)
        conv7 = Activation("relu")(conv7)
        # conv7 = Dropout(0.1)(conv7)

        conv7 = Conv2D(64, kernel_size=3, padding="SAME")(conv7)
        conv7 = BatchNormalization()(conv7)
        conv7 = Activation("relu")(conv7)
        # conv7 = Dropout(0.1)(conv7)

        conv8 = Conv2DTranspose(1, kernel_size=3,
                                padding="VALID")(conv7)  # 50 x 50

        merge9 = Concatenate()([conv8, input_layer])

        conv10 = Conv2D(1, kernel_size=1)(merge9)
        conv10 = BatchNormalization()(conv10)
        conv10 = Activation("relu")(conv10)

        model = Model(inputs=input_layer, outputs=conv10)

        initial_learning_rate = 0.1
        lr_schedule = ExponentialDecay(initial_learning_rate,
                                       decay_steps=100000,
                                       decay_rate=0.96,
                                       staircase=True)
        model.compile(optimizer=Adam(learning_rate=lr_schedule),
                      loss=MeanSquaredError(),
                      metrics=["accuracy"])
        return model
    def __init__(self,
                 timesteps,
                 includeAux,
                 folderI,
                 trainLoss,
                 includeModis,
                 includeVGG,
                 disLoss,
                 cloud_cov=0.4,
                 istransfer=False,
                 img_h=256,
                 img_width=256,
                 startT='01-01-2018',
                 endT='01-05-2019'):

        self.img_h = img_h
        self.img_w = img_width
        self.timesteps = timesteps
        self.includeModis = includeModis
        hvd.init()

        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()],
                                                       'GPU')
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

        self.gen_schedule = ExponentialDecay(1e-4 * hvd.size(),
                                             decay_steps=10000,
                                             decay_rate=0.1,
                                             staircase=True)

        self.disc_schedule = ExponentialDecay(1e-4 * hvd.size() * 5,
                                              decay_steps=10000,
                                              decay_rate=0.1,
                                              staircase=True)
        self.istransfer = istransfer
        # self.disOp = hvd.DistributedOptimizer(tf.keras.optimizers.Adam(1e-4 * hvd.size(), 0.5))
        # self.lstmOp = hvd.DistributedOptimizer(Adam(lr=1e-4 * hvd.size(), beta_1=0.9, beta_2=0.999, epsilon=1e-08))
        self.disOp = hvd.DistributedOptimizer(
            Adam(learning_rate=self.disc_schedule))
        self.lstmOp = hvd.DistributedOptimizer(
            Adam(learning_rate=self.gen_schedule))

        self.model_helpers = models.LSTM_GAN_MODEL(disOp=self.disOp,
                                                   lstmOp=self.lstmOp,
                                                   h=self.img_h,
                                                   w=self.img_w,
                                                   timeStep=timesteps,
                                                   includeAux=includeAux,
                                                   trainLoss=trainLoss,
                                                   disLoss=disLoss)

        # print("GOT MODIS======", includeModis)
        if includeVGG and includeModis == 0:
            if istransfer:
                self.dataloader = dataloaders.DatasetHandling(
                    self.img_w,
                    self.img_h,
                    no_of_timesteps=timesteps,
                    startT=startT,
                    endT=endT,
                    cloud_cov=cloud_cov,
                    album='foco-co-20km')

                self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_with_vgg_transfer(
                    self.transferLear())
            else:
                self.dataloader = dataloaders.DatasetHandling(
                    self.img_w,
                    self.img_h,
                    no_of_timesteps=timesteps,
                    startT=startT,
                    endT=endT,
                    cloud_cov=cloud_cov)

                self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_with_vgg(
                )
        elif not includeVGG and includeModis == 0:
            self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_no_vgg(
            )
        elif includeModis == 1:
            self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_with_vgg_multi_modis(
            )

        self.dirName = "/s/" + socket.gethostname(
        ) + "/a/nobackup/galileo/paahuni/" + str(folderI) + "/"
        if not includeModis == 2:
            self.img_itr = self.dataloader.get_non_random_image_iterator_new(
                batch_size=1,
                no_of_timesteps=self.timesteps,
                sendMetaInfo=True,
                includeModis=includeModis)
        else:
            self.dataloader = dataloaders.DatasetHandling(
                self.img_w,
                self.img_h,
                no_of_timesteps=timesteps,
                startT=startT,
                endT=endT,
                cloud_cov=cloud_cov)
        self.includeVGG = includeVGG
Exemplo n.º 30
0
        f"mv ./experiments/{conf['name']}/* ./experiments/{conf['name']}_{NOW}/"
    )
else:
    os.mkdir(f"./experiments/{conf['name']}")

hdf5_dir = f"./experiments/{conf['name']}/HDF5"
os.mkdir(hdf5_dir)

# schedules
if conf['parameter']['schedules'] == 'CosineDecayRestarts':
    from tensorflow.keras.experimental import CosineDecayRestarts
    learning_rate = CosineDecayRestarts(conf['parameter']['learning_rate'],
                                        100)
elif conf['parameter']['schedules'] == 'ExponentialDecay':
    from tensorflow.keras.optimizers.schedules import ExponentialDecay
    learning_rate = ExponentialDecay(conf['parameter']['learning_rate'])
else:
    learning_rate = conf['parameter']['learning_rate']
# =============================================================================

# optimizer
if conf['parameter']['Optimizers'] == 'SGD':
    from tensorflow.keras.optimizers import SGD
    optimizer = SGD(learning_rate)
else:
    from tensorflow.keras.optimizers import Adamax
    optimizer = Adamax(learning_rate)
# =============================================================================

# callbacks
callbacks = [