示例#1
0
print("Mean Tmelt in training data: ", train_mean)
print("Standard deviation of Tmelt in training data: ", train_std)
print("Min value of Tmelt in training data: ", np.min(y_train))
print("Max value of Tmelt in training data: ", np.max(y_train))
y_train = (y_train - train_mean) / train_std
y_test = (y_test - train_mean) / train_std


def rmse_tmelt(target, predicted, std=train_std):
    mse = mean_squared_error(target, predicted)
    rmse = np.sqrt(mse) * std
    return rmse


from openchem.data.utils import save_smiles_property_file
save_smiles_property_file('./benchmark_datasets/melt_temp/train.smi', X_train,
                          y_train.reshape(-1, 1))

save_smiles_property_file('./benchmark_datasets/melt_temp/test.smi', X_test,
                          y_test.reshape(-1, 1))

from openchem.data.smiles_data_layer import SmilesDataset
train_dataset = SmilesDataset('./benchmark_datasets/melt_temp/train.smi',
                              delimiter=',',
                              cols_to_read=[0, 1],
                              tokens=tokens,
                              augment=True)
test_dataset = SmilesDataset('./benchmark_datasets/melt_temp/test.smi',
                             delimiter=',',
                             cols_to_read=[0, 1],
                             tokens=tokens)
示例#2
0
from openchem.data.utils import get_tokens
tokens, _, _ = get_tokens(reactants)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(reactants,
                                                    labels,
                                                    test_size=0.2,
                                                    random_state=42)
y_mean = np.mean(y_train)
y_std = np.std(y_train)
y_train = (y_train - y_mean) / y_std
y_test = (y_test - y_mean) / y_std

from openchem.data.utils import save_smiles_property_file
save_smiles_property_file('./benchmark_datasets/reactions/train.smi',
                          X_train,
                          y_train,
                          delimiter=" ")
save_smiles_property_file('./benchmark_datasets/reactions/test.smi',
                          X_test,
                          y_test,
                          delimiter=" ")

from openchem.data.smiles_data_layer import SmilesDataset
head1_arguments = {"tokens": tokens, "delimiter": " ", "sanitize": False}
head2_arguments = {"tokens": tokens, "delimiter": " ", "sanitize": False}
train_dataset = SiameseDataset('./benchmark_datasets/reactions/train.smi',
                               head1_type='smiles',
                               head2_type='smiles',
                               cols_to_read=[0, 1, 2],
                               head1_arguments=head1_arguments,
                               head2_arguments=head2_arguments)
示例#3
0
smiles = data[0]
labels = np.array(data[1:])

labels[np.where(labels=='')] = '999'
labels = labels.T

from openchem.data.utils import get_tokens
tokens, _, _ = get_tokens(smiles)
tokens = tokens + ' '

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(smiles, labels, test_size=0.2,
                                                    random_state=42)

from openchem.data.utils import save_smiles_property_file
save_smiles_property_file('./benchmark_datasets/tox21/train.smi', X_train, y_train)
save_smiles_property_file('./benchmark_datasets/tox21/test.smi', X_test, y_test)

from openchem.data.smiles_data_layer import SmilesDataset
train_dataset = SmilesDataset('./benchmark_datasets/tox21/train.smi',
                              delimiter=',', cols_to_read=list(range(13)),
                              tokens=tokens, augment=True)
test_dataset = SmilesDataset('./benchmark_datasets/tox21/test.smi',
                            delimiter=',', cols_to_read=list(range(13)),
                            tokens=tokens)

def multitask_auc(ground_truth, predicted):
    from sklearn.metrics import roc_auc_score
    import numpy as np
    import torch
    ground_truth = np.array(ground_truth)