def load_dataset(method, labels, prefix='input', num_data=-1): policy = _CacheNamePolicy(method, labels, prefix, num_data=num_data) train_path = policy.get_train_file_path() val_path = policy.get_val_file_path() test_path = policy.get_test_file_path() train, val, test = None, None, None if os.path.exists(policy.cache_dir): print('load from cache {}'.format(policy.cache_dir)) train = NumpyTupleDataset.load(train_path) val = NumpyTupleDataset.load(val_path) test = NumpyTupleDataset.load(test_path) if train is None or val is None or test is None: print('preprocessing dataset...') preprocessor = preprocess_method_dict[method]() if num_data >= 0: # Use `num_data` examples for train target_index = numpy.arange(num_data) train, val, test = D.get_tox21(preprocessor, labels=labels, train_target_index=target_index, val_target_index=None, test_target_index=None) else: train, val, test = D.get_tox21(preprocessor, labels=labels) # Cache dataset policy.create_cache_directory() NumpyTupleDataset.save(train_path, train) NumpyTupleDataset.save(val_path, val) NumpyTupleDataset.save(test_path, test) return train, val, test
def load_dataset(method, labels, prefix='input', num_data=-1): policy = _CacheNamePolicy(method, labels, prefix, num_data=num_data) train_path = policy.get_train_file_path() val_path = policy.get_val_file_path() test_path = policy.get_test_file_path() train, val, test = None, None, None print() if os.path.exists(policy.cache_dir): print('load from cache {}'.format(policy.cache_dir)) train = NumpyTupleDataset.load(train_path) val = NumpyTupleDataset.load(val_path) test = NumpyTupleDataset.load(test_path) if train is None or val is None or test is None: print('preprocessing dataset...') preprocessor = preprocess_method_dict[method]() if num_data >= 0: # Use `num_data` examples for train target_index = numpy.arange(num_data) train, val, test = D.get_tox21( preprocessor, labels=labels, train_target_index=target_index, val_target_index=None, test_target_index=None ) else: train, val, test = D.get_tox21(preprocessor, labels=labels) # Cache dataset policy.create_cache_directory() NumpyTupleDataset.save(train_path, train) NumpyTupleDataset.save(val_path, val) NumpyTupleDataset.save(test_path, test) return train, val, test
def load_dataset(method, labels, prefix='input'): method = 'nfp' if 'nfp' in method else method # to deal with nfpdrop method = 'ggnn' if 'ggnn' in method else method # to deal with ggnndrop policy = _CacheNamePolicy(method, labels, prefix) train_path = policy.get_train_file_path() val_path = policy.get_val_file_path() test_path = policy.get_test_file_path() smiles_path = policy.get_smiles_path() train, val, test = None, None, None train_smiles, val_smiles, test_smiles = None, None, None print() if os.path.exists(policy.cache_dir): print('load from cache {}'.format(policy.cache_dir)) train = NumpyTupleDataset.load(train_path) val = NumpyTupleDataset.load(val_path) test = NumpyTupleDataset.load(test_path) train_smiles, val_smiles, test_smiles = utils.load_npz(smiles_path) if train is None or val is None or test is None: print('preprocessing dataset...') preprocessor = preprocess_method_dict[method]() if labels == 'pyridine': train, val, test, train_smiles, val_smiles, test_smiles = D.get_tox21( preprocessor, labels=None, return_smiles=True) print('converting label into pyridine...') # --- Pyridine = 1 --- train_pyridine_label = [ hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(train_smiles)] val_pyridine_label = [ hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(val_smiles)] test_pyridine_label = [ hassubst(Chem.MolFromSmiles(smi), smart=PYRIDINE_SMILES) for smi in tqdm(test_smiles)] train_pyridine_label = numpy.array(train_pyridine_label)[:, None] val_pyridine_label = numpy.array(val_pyridine_label)[:, None] test_pyridine_label = numpy.array(test_pyridine_label)[:, None] print('train positive/negative', numpy.sum(train_pyridine_label == 1), numpy.sum(train_pyridine_label == 0)) train = NumpyTupleDataset(*train.features[:, :-1], train_pyridine_label) val = NumpyTupleDataset(*val.features[:, :-1], val_pyridine_label) test = NumpyTupleDataset(*test.features[:, :-1], test_pyridine_label) else: train, val, test, train_smiles, val_smiles, test_smiles = D.get_tox21( preprocessor, labels=labels, return_smiles=True) # Cache dataset policy.create_cache_directory() NumpyTupleDataset.save(train_path, train) NumpyTupleDataset.save(val_path, val) NumpyTupleDataset.save(test_path, test) train_smiles = numpy.array(train_smiles) val_smiles = numpy.array(val_smiles) test_smiles = numpy.array(test_smiles) utils.save_npz(smiles_path, (train_smiles, val_smiles, test_smiles)) return train, val, test, train_smiles, val_smiles, test_smiles
def load_dataset(method, labels, prefix='input'): policy = _CacheNamePolicy(method, labels, prefix) train_path = policy.get_train_file_path() val_path = policy.get_val_file_path() test_path = policy.get_test_file_path() train, val, test = None, None, None print() if os.path.exists(policy.cache_dir): print('load from cache {}'.format(policy.cache_dir)) train = NumpyTupleDataset.load(train_path) val = NumpyTupleDataset.load(val_path) test = NumpyTupleDataset.load(test_path) if train is None or val is None or test is None: print('preprocessing dataset...') preprocessor = preprocess_method_dict[method]() train, val, test = D.get_tox21(preprocessor, labels=labels) # Cache dataset policy.create_cache_directory() NumpyTupleDataset.save(train_path, train) NumpyTupleDataset.save(val_path, val) NumpyTupleDataset.save(test_path, test) return train, val, test