Пример #1
0
def test_smiles_parser_target_index(mol_smiles, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles,
                          return_smiles=True,
                          target_index=[0, 2],
                          return_is_successful=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 2
    is_successful = result['is_successful']
    assert numpy.alltrue(is_successful)
    assert len(is_successful) == 2

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_input_features(dataset[0], expect)

    expect = preprocessor.get_input_features(mols[2])
    check_input_features(dataset[1], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'CC1=CC2CC(CC1)O2'
Пример #2
0
def test_smiles_parser_target_index(mol_smiles, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=True, target_index=[0, 2],
                          return_is_successful=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 2
    is_successful = result['is_successful']
    assert numpy.alltrue(is_successful)
    assert len(is_successful) == 2

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_input_features(dataset[0], expect)

    expect = preprocessor.get_input_features(mols[2])
    check_input_features(dataset[1], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'CC1=CC2CC(CC1)O2'
Пример #3
0
def test_smiles_parser_not_return_smiles(mol_smiles, mols):
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    is_successful = result['is_successful']
    assert len(dataset) == 3
    assert smiles is None
    assert is_successful is None

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
Пример #4
0
def test_smiles_parser_not_return_smiles(mol_smiles, mols):
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    is_successful = result['is_successful']
    assert len(dataset) == 3
    assert smiles is None
    assert is_successful is None

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
Пример #5
0
def test_atomic_number_preprocessor_default():
    preprocessor = AtomicNumberPreprocessor()
    dataset = SmilesParser(preprocessor).parse(
        ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])['dataset']
    index = numpy.random.choice(len(dataset), None)
    atoms, = dataset[index]

    assert atoms.ndim == 1
    assert atoms.dtype == numpy.int32
Пример #6
0
def test_smiles_parser_return_is_successful(mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    mol_smiles_with_invalid = [
        'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2']
    result = parser.parse(mol_smiles_with_invalid, return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
Пример #7
0
def test_smiles_parser_return_smiles(mol_smiles, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
    assert smiles[2] == 'CC1=CC2CC(CC1)O2'
Пример #8
0
def test_smiles_parser_return_is_successful(mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    mol_smiles_with_invalid = [
        'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2'
    ]
    result = parser.parse(mol_smiles_with_invalid,
                          return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
Пример #9
0
def test_smiles_parser_return_smiles(mol_smiles, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
    assert smiles[2] == 'CC1=CC2CC(CC1)O2'
Пример #10
0
def test_rsgcn_preprocessor_default():
    preprocessor = RSGCNPreprocessor()

    dataset = SmilesParser(preprocessor).parse(
        ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])['dataset']

    index = numpy.random.choice(len(dataset), None)
    atoms, adjacency = dataset[index]

    assert atoms.ndim == 1  # (atom, )
    assert atoms.dtype == numpy.int32
    assert adjacency.ndim == 2
    assert adjacency.dtype == numpy.float32
Пример #11
0
def test_nfp_preprocessor_default():
    preprocessor = NFPPreprocessor()

    dataset = SmilesParser(preprocessor).parse(
        ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])['dataset']

    index = numpy.random.choice(len(dataset), None)
    atoms, adjs = dataset[index]

    assert atoms.ndim == 1  # (atom, )
    assert atoms.dtype == numpy.int32
    # (atom from, atom to)
    assert adjs.ndim == 2
    assert adjs.dtype == numpy.float32
Пример #12
0
def test_relgcn_preprocessor_kekulize():
    preprocessor = RelGCNPreprocessor(kekulize=True)
    dataset = SmilesParser(preprocessor).parse(
        ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])["dataset"]
    atoms1, adjs1 = dataset[1]
    assert numpy.allclose(
        atoms1, numpy.array([6, 6, 6, 7, 6, 6, 8, 7, 6], numpy.int32))
    # NOT include aromatic bond (ch=3)
    expect_adjs = numpy.array([[[0., 1., 0., 0., 0., 0., 0., 0., 0.],
                                [1., 0., 0., 0., 0., 0., 0., 1., 0.],
                                [0., 0., 0., 1., 0., 0., 0., 0., 0.],
                                [0., 0., 1., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 1., 0., 1., 0.],
                                [0., 0., 0., 0., 1., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 1., 0., 0., 1., 0., 0., 0., 1.],
                                [0., 0., 0., 0., 0., 0., 0., 1., 0.]],
                               [[0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 1., 0., 0., 0., 0., 0., 0.],
                                [0., 1., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 1., 0., 0., 0., 0.],
                                [0., 0., 0., 1., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 1., 0., 0.],
                                [0., 0., 0., 0., 0., 1., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.]],
                               [[0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.]],
                               [[0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
                              dtype=numpy.float32)
    assert numpy.allclose(adjs1, expect_adjs)
def test_weave_preprocessor(max_atoms, use_fixed_atom_feature):
    preprocessor = WeaveNetPreprocessor(
        max_atoms=max_atoms, use_fixed_atom_feature=use_fixed_atom_feature)
    dataset = SmilesParser(preprocessor).parse(
        ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])["dataset"]

    index = numpy.random.choice(len(dataset), None)
    atoms, adjs = dataset[index]
    if use_fixed_atom_feature:
        assert atoms.ndim == 2  # (atom, ch)
        assert atoms.dtype == numpy.float32
    else:
        assert atoms.ndim == 1  # (atom, )
        assert atoms.dtype == numpy.int32
    # (atom from * atom to, ch)
    assert adjs.ndim == 2
    assert adjs.shape[0] == max_atoms * max_atoms
    assert adjs.dtype == numpy.float32

    # TODO(nakago): test feature extraction behavior...
    atoms0, adjs0 = dataset[0]
Пример #14
0
def test_smiles_parser_extract_total_num(mol_smiles):
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    num = parser.extract_total_num(mol_smiles)
    assert num == 3
Пример #15
0
def test_relgcn_preprocessor():
    preprocessor = RelGCNPreprocessor()
    dataset = SmilesParser(preprocessor).parse(
        ['C#N', 'Cc1cnc(C=O)n1C', 'c1ccccc1'])["dataset"]

    index = numpy.random.choice(len(dataset), None)
    atoms, adjs = dataset[index]
    assert atoms.ndim == 1  # (atom, )
    assert atoms.dtype == numpy.int32
    # (edge_type, atom from, atom to)
    assert adjs.ndim == 3
    assert adjs.dtype == numpy.float32

    atoms0, adjs0 = dataset[0]
    assert numpy.allclose(atoms0, numpy.array([6, 7], numpy.int32))
    expect_adjs = numpy.array([[[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]],
                               [[0., 1.], [1., 0.]], [[0., 0.], [0., 0.]]],
                              dtype=numpy.float32)
    assert numpy.allclose(adjs0, expect_adjs)

    atoms1, adjs1 = dataset[1]
    assert numpy.allclose(
        atoms1, numpy.array([6, 6, 6, 7, 6, 6, 8, 7, 6], numpy.int32))
    # include aromatic bond (ch=3)
    expect_adjs = numpy.array([[[0., 1., 0., 0., 0., 0., 0., 0., 0.],
                                [1., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 1., 0., 0., 0.],
                                [0., 0., 0., 0., 1., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 1.],
                                [0., 0., 0., 0., 0., 0., 0., 1., 0.]],
                               [[0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 1., 0., 0.],
                                [0., 0., 0., 0., 0., 1., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.]],
                               [[0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.]],
                               [[0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 1., 0., 0., 0., 0., 1., 0.],
                                [0., 1., 0., 1., 0., 0., 0., 0., 0.],
                                [0., 0., 1., 0., 1., 0., 0., 0., 0.],
                                [0., 0., 0., 1., 0., 0., 0., 1., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.],
                                [0., 1., 0., 0., 1., 0., 0., 0., 0.],
                                [0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
                              dtype=numpy.float32)
    assert numpy.allclose(adjs1, expect_adjs)
Пример #16
0
def test_smiles_parser_extract_total_num(mol_smiles):
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    num = parser.extract_total_num(mol_smiles)
    assert num == 3