def test_smiles_parser_target_index(mol_smiles, mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) result = parser.parse(mol_smiles, return_smiles=True, target_index=[0, 2], return_is_successful=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 2 is_successful = result['is_successful'] assert numpy.alltrue(is_successful) assert len(is_successful) == 2 # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. expect = preprocessor.get_input_features(mols[0]) check_input_features(dataset[0], expect) expect = preprocessor.get_input_features(mols[2]) check_input_features(dataset[1], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_not_return_smiles(mol_smiles, mols): preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) result = parser.parse(mol_smiles, return_smiles=False) dataset = result['dataset'] smiles = result['smiles'] is_successful = result['is_successful'] assert len(dataset) == 3 assert smiles is None assert is_successful is None # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_atomic_number_preprocessor_default(): preprocessor = AtomicNumberPreprocessor() dataset = SmilesParser(preprocessor).parse( ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])['dataset'] index = numpy.random.choice(len(dataset), None) atoms, = dataset[index] assert atoms.ndim == 1 assert atoms.dtype == numpy.int32
def test_smiles_parser_return_is_successful(mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) mol_smiles_with_invalid = [ 'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2'] result = parser.parse(mol_smiles_with_invalid, return_smiles=True, return_is_successful=True) dataset = result['dataset'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_smiles_parser_return_smiles(mol_smiles, mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) result = parser.parse(mol_smiles, return_smiles=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'Cc1ccccc1' assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_return_is_successful(mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) mol_smiles_with_invalid = [ 'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2' ] result = parser.parse(mol_smiles_with_invalid, return_smiles=True, return_is_successful=True) dataset = result['dataset'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_rsgcn_preprocessor_default(): preprocessor = RSGCNPreprocessor() dataset = SmilesParser(preprocessor).parse( ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])['dataset'] index = numpy.random.choice(len(dataset), None) atoms, adjacency = dataset[index] assert atoms.ndim == 1 # (atom, ) assert atoms.dtype == numpy.int32 assert adjacency.ndim == 2 assert adjacency.dtype == numpy.float32
def test_nfp_preprocessor_default(): preprocessor = NFPPreprocessor() dataset = SmilesParser(preprocessor).parse( ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])['dataset'] index = numpy.random.choice(len(dataset), None) atoms, adjs = dataset[index] assert atoms.ndim == 1 # (atom, ) assert atoms.dtype == numpy.int32 # (atom from, atom to) assert adjs.ndim == 2 assert adjs.dtype == numpy.float32
def test_relgcn_preprocessor_kekulize(): preprocessor = RelGCNPreprocessor(kekulize=True) dataset = SmilesParser(preprocessor).parse( ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])["dataset"] atoms1, adjs1 = dataset[1] assert numpy.allclose( atoms1, numpy.array([6, 6, 6, 7, 6, 6, 8, 7, 6], numpy.int32)) # NOT include aromatic bond (ch=3) expect_adjs = numpy.array([[[0., 1., 0., 0., 0., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0., 1., 0.], [0., 0., 0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 1., 0., 0., 1., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 0., 0., 1., 0.]], [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0., 0.], [0., 1., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 1., 0., 0.], [0., 0., 0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.]], [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.]], [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.]]], dtype=numpy.float32) assert numpy.allclose(adjs1, expect_adjs)
def test_weave_preprocessor(max_atoms, use_fixed_atom_feature): preprocessor = WeaveNetPreprocessor( max_atoms=max_atoms, use_fixed_atom_feature=use_fixed_atom_feature) dataset = SmilesParser(preprocessor).parse( ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])["dataset"] index = numpy.random.choice(len(dataset), None) atoms, adjs = dataset[index] if use_fixed_atom_feature: assert atoms.ndim == 2 # (atom, ch) assert atoms.dtype == numpy.float32 else: assert atoms.ndim == 1 # (atom, ) assert atoms.dtype == numpy.int32 # (atom from * atom to, ch) assert adjs.ndim == 2 assert adjs.shape[0] == max_atoms * max_atoms assert adjs.dtype == numpy.float32 # TODO(nakago): test feature extraction behavior... atoms0, adjs0 = dataset[0]
def test_smiles_parser_extract_total_num(mol_smiles): preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) num = parser.extract_total_num(mol_smiles) assert num == 3
def test_relgcn_preprocessor(): preprocessor = RelGCNPreprocessor() dataset = SmilesParser(preprocessor).parse( ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])["dataset"] index = numpy.random.choice(len(dataset), None) atoms, adjs = dataset[index] assert atoms.ndim == 1 # (atom, ) assert atoms.dtype == numpy.int32 # (edge_type, atom from, atom to) assert adjs.ndim == 3 assert adjs.dtype == numpy.float32 atoms0, adjs0 = dataset[0] assert numpy.allclose(atoms0, numpy.array([6, 7], numpy.int32)) expect_adjs = numpy.array([[[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]], [[0., 1.], [1., 0.]], [[0., 0.], [0., 0.]]], dtype=numpy.float32) assert numpy.allclose(adjs0, expect_adjs) atoms1, adjs1 = dataset[1] assert numpy.allclose( atoms1, numpy.array([6, 6, 6, 7, 6, 6, 8, 7, 6], numpy.int32)) # include aromatic bond (ch=3) expect_adjs = numpy.array([[[0., 1., 0., 0., 0., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 0., 0., 1., 0.]], [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 1., 0., 0.], [0., 0., 0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.]], [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.]], [[0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 1., 0.], [0., 1., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 1., 0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0.]]], dtype=numpy.float32) assert numpy.allclose(adjs1, expect_adjs)