示例#1
0
def leave_one_out_evaluation(X, Y, compounds, model='autoencoder', x_vivo_arg=False, y_vivo_arg=False):
    print("Performing leave-one-out evaluation with '{}' model".format(model))

    #train the specified model using leave-one-compound-out cross validation
    
    global x_vivo, y_vivo
    x_vivo = x_vivo_arg
    y_vivo = y_vivo_arg

    total_errors = []
    total_X_train = None
    total_Y_train = None
    total_X_valid = None
    total_Y_valid = None
    total_recon_train = None
    total_recon_valid = None
    total_train_compounds = None
    total_valid_compounds = None
    unique_compounds = np.unique(compounds)

    counter = 0
    for i, exclude_compound in enumerate(unique_compounds):
        counter = counter + 1
        print("Excluding compound", exclude_compound, "[", counter, "/", len(unique_compounds), "]")
        X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y, train_compounds, \
        valid_compounds = split_train_test(X, Y, compounds, x_vivo, y_vivo, exclude_compound=exclude_compound)

        if model == 'mod_autoencoder':
            X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_mod_autoencoders(X_train, X_valid,
                                                                                                      Y_train, Y_valid,
                                                                                                      norms_X, norms_Y)
        elif model == 'cnn':
            X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = cnn_model(
                X_train, X_valid,
                Y_train, Y_valid,
                norms_X, norms_Y)

        elif model == 'naive_encoder':
            X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_naive_encoder(
                X_train, X_valid,
                Y_train, Y_valid,
                norms_X, norms_Y)
        else:
            X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_base_model(X_train, X_valid,
                                                                                                    Y_train, Y_valid,
                                                                                                    norms_X, norms_Y,
                                                                                                    model)

        if i == 0:
            total_X_train = X_train
            total_Y_train = Y_train
            total_X_valid = X_valid
            total_Y_valid = Y_valid
            total_recon_train = recon_train
            total_recon_valid = recon_valid
            total_train_compounds = train_compounds
            total_valid_compounds = valid_compounds
        else:
            total_X_train = np.append(total_X_train, X_train, axis=0)
            total_Y_train = np.append(total_Y_train, Y_train, axis=0)
            total_X_valid = np.append(total_X_valid, X_valid, axis=0)
            total_Y_valid = np.append(total_Y_valid, Y_valid, axis=0)
            total_recon_train = np.append(total_recon_train, recon_train, axis=0)
            total_recon_valid = np.append(total_recon_valid, recon_valid, axis=0)
            total_train_compounds = np.append(total_train_compounds, train_compounds, axis=0)
            total_valid_compounds = np.append(total_valid_compounds, valid_compounds, axis=0)

        total_errors.append(errors)

    total_errors = np.array(total_errors)
    avg_errors = np.mean(total_errors, axis=0)
    print("Average Errors:")
    print("Training mae:{}, mse:{}".format(avg_errors[0], avg_errors[1]))
    print("Validation mae:{}, mse:{}".format(avg_errors[2], avg_errors[3]))

    store_mae = avg_errors[2]  # Dan: I added this

    print("Compounds sorted by validation MAE (compound, MAE):")
    mae = total_errors[:, 2]
    for i in np.argsort(mae):
        print(unique_compounds[i], mae[i])

    data = {
        'X_train': total_X_train, 'Y_train': total_Y_train, 'recon_train': total_recon_train,
        'X_valid': total_X_valid, 'Y_valid': total_Y_valid, 'recon_valid': total_recon_valid,
        'train_compounds': total_train_compounds, 'valid_compounds': total_valid_compounds
    }
    return data, store_mae
示例#2
0
def random_split_evaluation(X,
                            Y,
                            compounds,
                            model='autoencoder',
                            iterations=50):
    print("Performing random split evaluation with '{}' model".format(model))
    total_errors = []
    total_X_train = None
    total_Y_train = None
    total_X_valid = None
    total_Y_valid = None
    total_recon_train = None
    total_recon_valid = None
    total_train_compounds = None
    total_valid_compounds = None

    total_errors = []
    for i in range(iterations):
        print(i)

        X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y, train_compounds, \
        valid_compounds = split_train_test(X, Y, compounds, x_vivo, y_vivo, train_split=0.8)

        if model == 'autoencoder':
            X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_autoencoders(
                X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y)
        else:
            X_train, X_valid, Y_train, Y_valid, recon_train, recon_valid, errors = train_base_model(
                X_train, X_valid, Y_train, Y_valid, norms_X, norms_Y, model)

        if i == 0:
            total_X_train = X_train
            total_Y_train = Y_train
            total_X_valid = X_valid
            total_Y_valid = Y_valid
            total_recon_train = recon_train
            total_recon_valid = recon_valid
            total_train_compounds = train_compounds
            total_valid_compounds = valid_compounds
        else:
            total_X_train = np.append(total_X_train, X_train, axis=0)
            total_Y_train = np.append(total_Y_train, Y_train, axis=0)
            total_X_valid = np.append(total_X_valid, X_valid, axis=0)
            total_Y_valid = np.append(total_Y_valid, Y_valid, axis=0)
            total_recon_train = np.append(total_recon_train,
                                          recon_train,
                                          axis=0)
            total_recon_valid = np.append(total_recon_valid,
                                          recon_valid,
                                          axis=0)
            total_train_compounds = np.append(total_train_compounds,
                                              train_compounds,
                                              axis=0)
            total_valid_compounds = np.append(total_valid_compounds,
                                              valid_compounds,
                                              axis=0)

        total_errors.append(errors)

    total_errors = np.array(total_errors)
    avg_errors = np.mean(total_errors, axis=0)
    print("Average Errors:")
    print("Training mae:{}, mse:{}".format(avg_errors[0], avg_errors[1]))
    print("Validation mae:{}, mse:{}".format(avg_errors[2], avg_errors[3]))

    data = {
        'X_train': total_X_train,
        'Y_train': total_Y_train,
        'recon_train': total_recon_train,
        'X_valid': total_X_valid,
        'Y_valid': total_Y_valid,
        'recon_valid': total_recon_valid,
        'train_compounds': total_train_compounds,
        'valid_compounds': total_valid_compounds
    }
    return data