Exemplo n.º 1
0
def get_label(predictor):
    key = get_key(predictor, suffix="train_y")
    path_to_file = resource_filename(Requirement.parse("molSimplify"),
                                     "molSimplify/tf_nn/" + key + '.csv')
    _df = pd.read_csv(path_to_file)
    lname = _df.columns.tolist()
    assert len(lname) == 1
    return lname
Exemplo n.º 2
0
def load_sklearn_model(predictor):
    key = get_key(predictor, suffix="model")
    modelfile = resource_filename(Requirement.parse("molSimplify"),
                                  "molSimplify/sklearn_models/" + key + '.h5')
    loaded_model = joblib.load(modelfile)
    return loaded_model
Exemplo n.º 3
0
def latent_space_uq(predictor,
                    layer_index=-2,
                    descriptors=False,
                    descriptor_names=False,
                    entropy=False):
    key = get_key(predictor, suffix=False)
    base_path = resource_filename(Requirement.parse("molSimplify"),
                                  "molSimplify/tf_nn/" + key)
    base_path = base_path + 'ensemble_models'
    if not os.path.exists(base_path):
        print('Ensemble models do not exist now, training...')
        ensemble_maker(predictor)
    print(('ANN activated for ' + str(predictor)))
    model_list = glob.glob(base_path + '/*.h5')
    train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(
        predictor)
    if (descriptors and descriptor_names):
        excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                               descriptor_names)
        excitation = data_normalize(excitation, train_mean_x, train_var_x)
    else:
        mat = load_training_data(predictor)
        mat = np.array(mat, dtype='float64')
        train_mat = data_normalize(mat, train_mean_x, train_var_x)
        excitation = np.array(train_mat)
    ### load test data
    if (descriptors and descriptor_names):
        excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                               descriptor_names)
        excitation_test = data_normalize(excitation, train_mean_x, train_var_x)
    else:
        mat = load_test_data(predictor)
        mat = np.array(mat, dtype='float64')
        test_mat = data_normalize(mat, train_mean_x, train_var_x)
        excitation_test = np.array(test_mat)
    labels = load_test_labels(predictor)
    labels_train = load_training_labels(predictor)
    if not 'clf' in predictor:
        labels = np.array(labels, dtype='float64')
        labels_train = np.array(labels_train, dtype='float64')
    else:
        labels = np.array(labels, dtype='int')
        labels_train = np.array(labels_train, dtype='int')
    results_list = []
    err_list = []
    dist_list = []
    for model in model_list:
        _base = model.split('.')[0]
        json_file = open(_base + '.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        loaded_model.load_weights(model)
        loaded_model.compile(loss="mse",
                             optimizer='adam',
                             metrics=['mse', 'mae', 'mape'])
        get_outputs = K.function(
            [loaded_model.layers[0].input,
             K.learning_phase()], [loaded_model.layers[-1].output])
        get_latent = K.function(
            [loaded_model.layers[0].input,
             K.learning_phase()], [loaded_model.layers[layer_index].output])
        print(
            ('NOTE: you are choosing:', loaded_model.layers[layer_index],
             loaded_model.layers[layer_index].name, 'for the latence space!'))
        if not 'clf' in predictor:
            results = data_rescale(np.array(get_outputs([excitation_test, 0])),
                                   train_mean_y, train_var_y)[0]
        else:
            results = np.array(get_outputs([excitation_test, 0]))[0]
        ## get latent dist
        training_latent_distance = np.array(get_latent([excitation, 0]))[0]
        nn_latent_dist_train, _, __ = dist_neighbor(training_latent_distance,
                                                    training_latent_distance,
                                                    labels_train,
                                                    l=5,
                                                    dist_ref=1,
                                                    just_nn=True)
        nn_dist_avrg_train = np.mean(nn_latent_dist_train)
        # print(nn_dist_avrg_train)
        test_latent_distance = np.array(get_latent([excitation_test, 0]))[0]
        nn_latent_dist_test, nn_dists, nn_labels = dist_neighbor(
            test_latent_distance,
            training_latent_distance,
            labels_train,
            l=5,
            dist_ref=nn_dist_avrg_train,
            just_nn=False)
        if not entropy:
            # print(nn_latent_dist_test.shape)
            # print(min(nn_latent_dist_test), max(nn_latent_dist_test))
            dist_list.append(nn_latent_dist_test)
        else:
            entropy = []
            for idx, _dists in enumerate(nn_dists):
                entropy.append(get_entropy(_dists, nn_labels[idx]))
            dist_list.append(np.array(entropy))
        results = results.squeeze(axis=1)
        err = np.linalg.norm(labels - results)**2
        results_list.append(results)
        err_list.append(err)
    dist_list = np.transpose(np.array(dist_list))
    results_list = np.transpose(np.array(results_list))
    result_mean, result_std = np.mean(results_list,
                                      axis=1), np.std(results_list, axis=1)
    latent_dist = np.mean(dist_list, axis=1)
    labels = np.squeeze(labels, axis=1)
    error_for_mean = np.abs(labels - result_mean)
    return result_mean, latent_dist, error_for_mean
Exemplo n.º 4
0
def ensemble_uq(predictor,
                descriptors=False,
                descriptor_names=False,
                suffix=False):
    key = get_key(predictor, suffix)
    base_path = resource_filename(Requirement.parse("molSimplify"),
                                  "molSimplify/tf_nn/" + key)
    base_path = base_path + 'ensemble_models'
    if not os.path.exists(base_path):
        print('Ensemble models do not exist now, training...')
        ensemble_maker(predictor)
    print(('ANN activated for ' + str(predictor)))
    model_list = glob.glob(base_path + '/*.h5')

    labels = load_test_labels(predictor)
    if not 'clf' in predictor:
        labels = np.array(labels, dtype='float64')
    else:
        labels = np.array(labels, dtype='int')
    train_mean_x, train_mean_y, train_var_x, train_var_y = load_normalization_data(
        predictor)
    if (descriptors and descriptor_names):
        excitation = tf_ANN_excitation_prepare(predictor, descriptors,
                                               descriptor_names)
        excitation = data_normalize(excitation, train_mean_x, train_var_x)
    else:
        mat = load_test_data(predictor)
        mat = np.array(mat, dtype='float64')
        train_mat = data_normalize(mat, train_mean_x, train_var_x)
        excitation = np.array(train_mat)
    print(('excitation is ' + str(excitation.shape)))
    print(('actual label:', labels[:3]))
    results_list = []
    # print('models', model_list)
    for idx, model in enumerate(model_list):
        _base = model.split('.')[0]
        json_file = open(_base + '.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into  model
        loaded_model.load_weights(model)
        # complile model
        loaded_model.compile(loss="mse",
                             optimizer='adam',
                             metrics=['mse', 'mae', 'mape'])
        if not 'clf' in predictor:
            result = data_rescale(loaded_model.predict(excitation),
                                  train_mean_y, train_var_y)
        else:
            result = loaded_model.predict(excitation)
            result = np.squeeze(result, axis=1)
        results_list.append(result)
    results_list = np.transpose(np.array(results_list))
    # print(results_list.shape)
    result_mean, result_std = np.mean(results_list,
                                      axis=1), np.std(results_list, axis=1)
    labels = np.squeeze(labels, axis=1)
    print((labels.shape, result_mean.shape))
    error_for_mean = np.abs(labels - result_mean)
    return result_mean, result_std, error_for_mean