print("Mean Tmelt in training data: ", train_mean) print("Standard deviation of Tmelt in training data: ", train_std) print("Min value of Tmelt in training data: ", np.min(y_train)) print("Max value of Tmelt in training data: ", np.max(y_train)) y_train = (y_train - train_mean) / train_std y_test = (y_test - train_mean) / train_std def rmse_tmelt(target, predicted, std=train_std): mse = mean_squared_error(target, predicted) rmse = np.sqrt(mse) * std return rmse from openchem.data.utils import save_smiles_property_file save_smiles_property_file('./benchmark_datasets/melt_temp/train.smi', X_train, y_train.reshape(-1, 1)) save_smiles_property_file('./benchmark_datasets/melt_temp/test.smi', X_test, y_test.reshape(-1, 1)) from openchem.data.smiles_data_layer import SmilesDataset train_dataset = SmilesDataset('./benchmark_datasets/melt_temp/train.smi', delimiter=',', cols_to_read=[0, 1], tokens=tokens, augment=True) test_dataset = SmilesDataset('./benchmark_datasets/melt_temp/test.smi', delimiter=',', cols_to_read=[0, 1], tokens=tokens)
from openchem.data.utils import get_tokens tokens, _, _ = get_tokens(reactants) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(reactants, labels, test_size=0.2, random_state=42) y_mean = np.mean(y_train) y_std = np.std(y_train) y_train = (y_train - y_mean) / y_std y_test = (y_test - y_mean) / y_std from openchem.data.utils import save_smiles_property_file save_smiles_property_file('./benchmark_datasets/reactions/train.smi', X_train, y_train, delimiter=" ") save_smiles_property_file('./benchmark_datasets/reactions/test.smi', X_test, y_test, delimiter=" ") from openchem.data.smiles_data_layer import SmilesDataset head1_arguments = {"tokens": tokens, "delimiter": " ", "sanitize": False} head2_arguments = {"tokens": tokens, "delimiter": " ", "sanitize": False} train_dataset = SiameseDataset('./benchmark_datasets/reactions/train.smi', head1_type='smiles', head2_type='smiles', cols_to_read=[0, 1, 2], head1_arguments=head1_arguments, head2_arguments=head2_arguments)
smiles = data[0] labels = np.array(data[1:]) labels[np.where(labels=='')] = '999' labels = labels.T from openchem.data.utils import get_tokens tokens, _, _ = get_tokens(smiles) tokens = tokens + ' ' from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(smiles, labels, test_size=0.2, random_state=42) from openchem.data.utils import save_smiles_property_file save_smiles_property_file('./benchmark_datasets/tox21/train.smi', X_train, y_train) save_smiles_property_file('./benchmark_datasets/tox21/test.smi', X_test, y_test) from openchem.data.smiles_data_layer import SmilesDataset train_dataset = SmilesDataset('./benchmark_datasets/tox21/train.smi', delimiter=',', cols_to_read=list(range(13)), tokens=tokens, augment=True) test_dataset = SmilesDataset('./benchmark_datasets/tox21/test.smi', delimiter=',', cols_to_read=list(range(13)), tokens=tokens) def multitask_auc(ground_truth, predicted): from sklearn.metrics import roc_auc_score import numpy as np import torch ground_truth = np.array(ground_truth)