Exemplo n.º 1
0
def plot_2Dimg_logp_predictions():
    # Load LogP data
    print('Loading data')
    (_, _), (x_test, y_test) = get_data('data/ncidb_2Dimg.npz')

    # Load Experimental LogP data
    print('Loading data')
    (_, _), (exp_x_test,
             exp_y_test) = get_data('data/ncidb_experim_data_2Dimg.npz',
                                    split=0.2)

    print('Normalize input dividing it by 255')
    # Input data normalization
    # Transform all input matrix elements in values belonging to [0,1] interval
    x_test /= 255
    exp_x_test /= 255

    # Get input and output dims
    n_h, n_w, n_c = x_test[0].shape
    n_y = y_test[0].shape[0]

    # Declare weights file path
    weights_file_path = 'weights/incep_resnet_compact_v4_logp_best_val_r2.h5'
    exp_weights_file_path = 'weights/incep_resnet_compact_v4_exp_logp_trsf_lrng_small_lr_best_val_r2.h5'

    # Load best LogP data predictor
    incep_mdl = incep_model_logp(n_h, n_w, n_c, n_y, lmbda=0)
    incep_mdl.load_weights(weights_file_path)

    # Load best Experimental LogP data predictor
    exp_incep_mdl = incep_model_logp(n_h,
                                     n_w,
                                     n_c,
                                     n_y,
                                     lmbda=0,
                                     frozen_index=7)
    exp_incep_mdl.load_weights(exp_weights_file_path)

    # Predict both LogP and experimental LogP
    y_pred = incep_mdl.predict(x_test)
    exp_y_pred = exp_incep_mdl.predict(exp_x_test)

    # Plot scatter
    fig = plt.figure()
    plt.scatter(y_test, y_pred, label="LogP model (Test Set). $R^2$: 0.852")
    plt.scatter(exp_y_test,
                exp_y_pred,
                label="Experimental LogP model (Test Set). $R^2$: 0.964")
    plt.title("Predicted LogP values from 2D molecule images")
    plt.xlabel("True values")
    plt.ylabel("Predicted values")
    plt.plot([-20, 40], [-20, 40], '--', color='red')
    plt.legend()
    # Save the plot
    plt.savefig("output/2Dimg_logp_predictions.png")
    plt.close(fig)
Exemplo n.º 2
0
def plot_fingerprints_logp_predictions():
    # Load LogP data
    print('Loading data')
    (_, _), (x_test, y_test) = get_data('data/ncidb_fingerprints.npz')

    # Load Experimental LogP data
    print('Loading data')
    (_, _), (exp_x_test,
             exp_y_test) = get_data('data/ncidb_experim_data_fingerprints.npz',
                                    split=0.2)

    # Get input and output dims
    n_x = x_test[0].shape[0]
    n_y = y_test[0].shape[0]

    # Declare weights file path
    weights_file_path = 'weights/fcnn_logp_6l_best_val_r2.h5'
    exp_weights_file_path = 'weights/fcnn_exp_logp_6l_trsf_lrng_best_val_r2.h5'

    # Load best LogP data predictor
    fcnn_mdl = fcnn_model_logp(n_x, n_y, lmbda=0)
    fcnn_mdl.load_weights(weights_file_path)

    # Load best Experimental LogP data predictor
    exp_fcnn_mdl = fcnn_model_logp(n_x, n_y, lmbda=0, frozen_layers=4)
    exp_fcnn_mdl.load_weights(exp_weights_file_path)

    # Predict both LogP and experimental LogP
    y_pred = fcnn_mdl.predict(x_test)
    exp_y_pred = exp_fcnn_mdl.predict(exp_x_test)

    # Plot scatter
    fig = plt.figure()
    plt.scatter(y_test, y_pred, label="LogP model (Test Set). $R^2$: 0.839")
    plt.scatter(exp_y_test,
                exp_y_pred,
                label="Experimental LogP model (Test Set). $R^2$: 0.923")
    plt.title("Predicted LogP values from molecular fingerprints")
    plt.xlabel("True values")
    plt.ylabel("Predicted values")
    plt.plot([-20, 40], [-20, 40], '--', color='red')
    plt.legend()
    # Save the plot
    plt.savefig("output/1Dfingerprints_logp_predictions.png")
    plt.close(fig)
Exemplo n.º 3
0
def main(train=False):
    """ Main function """
    # get train and test dataset
    print('Loading data')
    (x_train,
     y_train), (x_test,
                y_test) = get_data('data/tox21_10k_data_all_2Dimg_ml.npz')

    print('Normalize input dividing it by 255')
    # Input data normalization
    # Transform all input matrix elements in values belonging to [0,1] interval
    x_train /= 255
    x_test /= 255

    n_h, n_w, n_c = x_train[0].shape
    n_y = y_train[0].shape[0]

    # Build model
    incep_tox21 = incep_model_tox21(n_h, n_w, n_c, n_y, lmbda=0)

    epochs = 100

    if train:
        # Train model
        print('\ntrain the model')

        # Define checkpoints
        # Create save weights checkpoint callback function
        weights_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_{epoch:d}.h5' % MODEL_NAME,
            save_weights_only=True,
            period=5)

        # Create save best weights checkpoint callback function
        best_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_best_val_r2.h5' % MODEL_NAME,
            monitor='val_masked_f1',
            verbose=1,
            save_best_only=True,
            mode='max')

        # Adjust Batch size based on GPUs number
        batch_size = 64 * GPUs or 32

        history = incep_tox21.fit(
            x_train,
            y_train,
            epochs=epochs,
            validation_split=0.1,
            batch_size=batch_size,
            callbacks=[weights_ckpt, best_ckpt]  # Save weights
        )

        # Get data from history
        metrics = ['masked_f1', 'loss', 'masked_accuracy']
        save_history(history,
                     "output/%s_%s_history.json" % (MODEL_NAME, epochs))
        plot_data(history, MODEL_NAME, epochs, metrics=metrics)
    else:
        # Load the model weights
        weights_file_path = os.path.abspath(
            os.path.join(os.curdir, 'weights/%s_%s.h5' % (MODEL_NAME, epochs)))
        if not os.path.exists(weights_file_path):
            raise Exception(
                "The weights file path specified does not exists: %s" %
                os.path.exists(weights_file_path))
        incep_mdl.load_weights(weights_file_path)

    print('\ntest the model')
    test_loss, test_f1_score, test_acc = incep_tox21.evaluate(x_test, y_test)

    print('\n#######################################')
    print('Test loss:', test_loss)
    print('Test accuracy:', test_acc)
    print('Test F1 score:', test_f1_score)
    print('\n#######################################')
    each_metric(x_test, y_test, incep_tox21)
Exemplo n.º 4
0
def main(train=False):
    """ Main function """
    # Get train and test dataset
    (x_train, y_train), (x_test, y_test) = get_data('data/ncidb_fingerprints.npz')

    n_x = x_train[0].shape[0]
    n_y = y_train[0].shape[0]

    # Build model
    fcnn_mdl = fcnn_model_logp(n_x, n_y, lmbda=0)

    epochs = 50

    if train:
        # Train model
        print('\ntrain the model')

        # Define checkpoints
        # Create save weights checkpoint callback function
        weights_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_{epoch:d}.h5' % MODEL_NAME,
            save_weights_only=True,
            period=5
        )

        # Create save best weights checkpoint callback function
        best_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_best_val_r2.h5' % MODEL_NAME,
            monitor='val_r_squared',
            verbose=1,
            save_best_only=True,
            mode='max'
        )

        history = fcnn_mdl.fit(x_train,
                               y_train,
                               epochs=epochs,
                               validation_split=0.1,
                               callbacks=[weights_ckpt, best_ckpt]  # Save weights
                               )

        #Get data from history
        metrics = ['mean_absolute_error', 'r_squared', 'loss']
        save_history(history, "output/%s_%s_history.json" % (MODEL_NAME, epochs))
        plot_data(history, MODEL_NAME, epochs, metrics=metrics)
    else:
        # Load the model weights
        weights_file_path = os.path.abspath(os.path.join(os.curdir, 'weights/%s_%s.h5' % (MODEL_NAME, epochs)))
        if not os.path.exists(weights_file_path):
            raise Exception(
                "The weights file path specified does not exists: %s"
                % os.path.exists(weights_file_path)
            )
        fcnn_mdl.load_weights(weights_file_path)

    print('\ntest the model')
    test_loss, test_mae, test_r_squared = fcnn_mdl.evaluate(x_test, y_test)

    print('\n#######################################')
    print('Test loss:', test_loss)
    print('Test mae:', test_mae)
    print('Test R2:', test_r_squared)
Exemplo n.º 5
0
def main(train=False, weights_file_path=None):
    """ Main function """
    # get train and test dataset
    print('Loading data')
    (x_train, y_train), (x_test, y_test) = get_data('data/ncidb_2Dimg.npz')

    print('Normalize input dividing it by 255')
    # Input data normalization
    # Transform all input matrix elements in values belonging to [0,1] interval
    x_train /= 255
    x_test /= 255

    n_h, n_w, n_c = x_train[0].shape
    n_y = y_train[0].shape[0]

    # Build model
    incep_mdl = incep_model_logp(n_h, n_w, n_c, n_y, lmbda=0)

    epochs = 100

    if train:
        # Train model
        print('\ntrain the model')

        # Define checkpoints
        # Create save weights checkpoint callback function
        weights_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_{epoch:d}.h5' % MODEL_NAME,
            save_weights_only=True,
            period=5)

        # Create save best weights checkpoint callback function
        best_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_best_val_r2.h5' % MODEL_NAME,
            monitor='val_r_squared',
            verbose=1,
            save_best_only=True,
            mode='max')

        csv_logger = keras.callbacks.CSVLogger('output/%s_%s_history.csv' %
                                               (MODEL_NAME, epochs))

        # Adjust Batch size based on GPUs number
        batch_size = 64 * GPUs or 32

        if weights_file_path:
            incep_mdl.load_weights(weights_file_path)

        history = incep_mdl.fit(
            x_train,
            y_train,
            epochs=epochs,
            validation_split=0.1,
            batch_size=batch_size,
            callbacks=[weights_ckpt, best_ckpt, csv_logger]  # Save weights
        )

        #Get data from history
        metrics = ['mean_absolute_error', 'r_squared', 'loss']
        save_history(history,
                     "output/%s_%s_history.json" % (MODEL_NAME, epochs))
        plot_data(history, MODEL_NAME, epochs, metrics=metrics)
    else:
        # Load the model weights
        if not weights_file_path:
            weights_file_path = os.path.abspath(
                os.path.join(os.curdir,
                             'weights/%s_%s.h5' % (MODEL_NAME, epochs)))
        if not os.path.exists(weights_file_path):
            raise Exception(
                "The weights file path specified does not exists: %s" %
                os.path.exists(weights_file_path))

        incep_mdl.load_weights(weights_file_path)

    print('\ntest the model')
    test_loss, test_mae, test_r_squared = incep_mdl.evaluate(x_test, y_test)

    print('\n#######################################')
    print('Test loss:', test_loss)
    print('Test mae:', test_mae)
    print('Test R2:', test_r_squared)
Exemplo n.º 6
0
def main(train=False):
    """ Main function """
    # Get train and test dataset
    (x_train,
     y_train), (x_test,
                y_test) = get_data('data/tox21_10k_data_all_fingerprints.npz')

    #set different data path if run with missing labels
    if ml == True:
        (x_train, y_train), (x_test, y_test) = get_data(
            'data/tox21_10k_data_all_fingerprints_multi.npz')

    n_x = x_train[0].shape[0]
    n_y = y_train[0].shape[0]

    #get the classifier
    fcnn_clf = fcnn_classifier_tox21(n_x, n_y)
    if ml == True:
        fcnn_clf = fcnn_classifier_tox21_ml(n_x, n_y)

    epochs = 50

    if train:
        # Train classifier
        print('\ntrain the classifier')

        # Define checkpoints
        # Create save weights checkpoint callback function
        weights_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_{epoch:d}.h5' % MODEL_NAME,
            save_weights_only=True,
            period=5)

        # Create save best weights checkpoint callback function
        best_ckpt = keras.callbacks.ModelCheckpoint(
            'weights/%s_best_f1_score.h5' % MODEL_NAME,
            monitor='val_f1_score',
            verbose=1,
            save_best_only=True,
            mode='max')

        history = fcnn_clf.fit(
            x_train,
            y_train,
            epochs=epochs,
            validation_split=0.1,
            callbacks=[weights_ckpt, best_ckpt]  # Save weights
        )

        # Get data from history
        metrics = ['f1_score', 'loss']
        save_history(history,
                     "output/%s_%s_history.json" % (MODEL_NAME, epochs))
        plot_data(history, MODEL_NAME, epochs, metrics=metrics)

    else:
        # Load the model weights
        weights_file_path = os.path.abspath(
            os.path.join(os.curdir, 'weights/fcnn_tox21_%s.h5' % epochs))
        if not os.path.exists(weights_file_path):
            raise Exception(
                "The weights file path specified does not exists: %s" %
                os.path.exists(weights_file_path))
        fcnn_clf.load_weights(weights_file_path)

    print('\ntest the classifier')

    print('\n#######################################')
    test_loss, test_f1_score = fcnn_clf.evaluate(x_test, y_test)

    print('\n#######################################')
    print('Test loss:', test_loss)
    print('Test F1 score:', test_f1_score)
    print('\n#######################################')
    each_metric(x_test, y_test, fcnn_clf)