Exemplo n.º 1
0
    def _add_alternative_bond_topologies(self, conformer, smiles_id_dict):
        beam.metrics.Metrics.counter(_METRICS_NAMESPACE,
                                     'attempted_topology_matches').inc()

        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = True
        matching_parameters.smiles_with_h = False
        matching_parameters.smiles_with_labels = False
        matching_parameters.neutral_forms_during_bond_matching = True
        matching_parameters.consider_not_bonded = True
        matching_parameters.ring_atom_count_cannot_decrease = False

        matches = topology_from_geom.bond_topologies_from_geom(
            bond_lengths=self._cached_bond_lengths,
            conformer_id=conformer.conformer_id,
            fate=conformer.fate,
            bond_topology=conformer.bond_topologies[0],
            geometry=conformer.optimized_geometry,
            matching_parameters=matching_parameters)

        if not matches.bond_topology:
            beam.metrics.Metrics.counter(_METRICS_NAMESPACE,
                                         'no_topology_matches').inc()
            return

        del conformer.bond_topologies[:]
        conformer.bond_topologies.extend(matches.bond_topology)
        for bt in conformer.bond_topologies:
            try:
                bt.bond_topology_id = smiles_id_dict[bt.smiles]
            except KeyError:
                beam.metrics.Metrics.counter(
                    _METRICS_NAMESPACE, 'topology_match_smiles_failure').inc()
    def test_ethane(self):
        """The simplest molecule, CC."""
        #     bond_topology = text_format.Parse(
        #         """
        #       atoms: ATOM_C
        #       atoms: ATOM_C
        #       bonds: {
        #         atom_a: 0,
        #         atom_b: 1,
        #         bond_type: BOND_SINGLE
        #       }
        # """, dataset_pb2.BondTopology())
        cc = text_format.Parse("""
      atoms: ATOM_C
      atoms: ATOM_C
""", dataset_pb2.BondTopology())
        scores = np.array([0.1, 1.1, 2.1, 3.1], dtype=np.float32)
        bonds_to_scores = {(0, 1): scores}
        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        mol = smu_molecule.SmuMolecule(cc, bonds_to_scores,
                                       matching_parameters)
        state = mol.generate_search_state()
        self.assertLen(state, 1)
        self.assertEqual(state, [[0, 1, 2, 3]])

        for i, s in enumerate(itertools.product(*state)):
            res = mol.place_bonds(s)
            self.assertIsNotNone(res)
            self.assertAlmostEqual(res.score, scores[i])
    def test_propane_all(self, btype1, btype2, expected_bonds, expected_score):
        cc = text_format.Parse(
            """
      atoms: ATOM_C
      atoms: ATOM_C
      atoms: ATOM_C
""", dataset_pb2.BondTopology())
        #   print(f"Generating bonds {btype1} and {btype2}")
        bonds_to_scores = {
            (0, 1): np.zeros(4, dtype=np.float32),
            (1, 2): np.zeros(4, dtype=np.float32)
        }
        bonds_to_scores[(0, 1)][btype1] = 1.0
        bonds_to_scores[(1, 2)][btype2] = 1.0
        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        mol = smu_molecule.SmuMolecule(cc, bonds_to_scores,
                                       matching_parameters)
        state = mol.generate_search_state()
        for s in itertools.product(*state):
            res = mol.place_bonds(s, matching_parameters)
            if expected_score is not None:
                self.assertIsNotNone(res)
                self.assertLen(res.bonds, expected_bonds)
                self.assertAlmostEqual(res.score, expected_score)
                if btype1 == 0:
                    if btype2 > 0:
                        self.assertEqual(res.bonds[0].bond_type, btype2)
                else:
                    self.assertEqual(res.bonds[0].bond_type, btype1)
                    self.assertEqual(res.bonds[1].bond_type, btype2)
            else:
                self.assertIsNone(res)
    def test_scores(self):
        carbon = dataset_pb2.BondTopology.AtomType.ATOM_C
        single_bond = dataset_pb2.BondTopology.BondType.BOND_SINGLE
        double_bond = dataset_pb2.BondTopology.BondType.BOND_DOUBLE

        # For testing, turn off the need for complete matching.
        smu_molecule.default_must_match_all_bonds = False

        all_distributions = bond_length_distribution.AllAtomPairLengthDistributions(
        )
        x, y = triangular_distribution(1.0, 1.4, 2.0)
        df = pd.DataFrame({"length": x, "count": y})
        bldc1c = bond_length_distribution.EmpiricalLengthDistribution(df, 0.0)
        all_distributions.add(carbon, carbon, single_bond, bldc1c)

        x, y = triangular_distribution(1.0, 1.5, 2.0)
        df = pd.DataFrame({"length": x, "count": y})
        bldc2c = bond_length_distribution.EmpiricalLengthDistribution(df, 0.0)
        all_distributions.add(carbon, carbon, double_bond, bldc2c)

        bond_topology = text_format.Parse(
            """
atoms: ATOM_C
atoms: ATOM_C
bonds: {
  atom_a: 0
  atom_b: 1
  bond_type: BOND_SINGLE
}
""", dataset_pb2.BondTopology())

        geometry = text_format.Parse(
            """
atom_positions {
  x: 0.0
  y: 0.0
  z: 0.0
},
atom_positions {
  x: 0.0
  y: 0.0
  z: 0.0
}
""", dataset_pb2.Geometry())
        geometry.atom_positions[1].x = 1.4 / smu_utils_lib.BOHR_TO_ANGSTROMS

        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        result = topology_from_geom.bond_topologies_from_geom(
            all_distributions, bond_topology, geometry, matching_parameters)
        self.assertIsNotNone(result)
        self.assertEqual(len(result.bond_topology), 2)
        self.assertEqual(len(result.bond_topology[0].bonds), 1)
        self.assertEqual(len(result.bond_topology[1].bonds), 1)
        self.assertGreater(result.bond_topology[0].score,
                           result.bond_topology[1].score)
        self.assertEqual(result.bond_topology[0].bonds[0].bond_type,
                         single_bond)
        self.assertEqual(result.bond_topology[1].bonds[0].bond_type,
                         double_bond)
Exemplo n.º 5
0
def _get_geometry_matching_parameters():
    out = smu_molecule.MatchingParameters()
    out.must_match_all_bonds = True
    out.smiles_with_h = False
    out.smiles_with_labels = False
    out.neutral_forms_during_bond_matching = True
    out.consider_not_bonded = True
    out.ring_atom_count_cannot_decrease = False
    return out
Exemplo n.º 6
0
 def __init__(self, outputter):
     self._wrapped_outputter = outputter
     self._geometry_data = GeometryData.get_singleton()
     self._matching_parameters = smu_molecule.MatchingParameters()
     self._matching_parameters.must_match_all_bonds = True
     self._matching_parameters.smiles_with_h = False
     self._matching_parameters.smiles_with_labels = False
     self._matching_parameters.neutral_forms_during_bond_matching = True
     self._matching_parameters.consider_not_bonded = True
     self._matching_parameters.ring_atom_count_cannot_decrease = False
    def test_ethane_all(self, btype, expected_bond):
        cc = text_format.Parse("""
      atoms: ATOM_C
      atoms: ATOM_C
""", dataset_pb2.BondTopology())
        bonds_to_scores = {(0, 1): np.zeros(4, dtype=np.float32)}
        bonds_to_scores[(0, 1)][btype] = 1.0
        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        mol = smu_molecule.SmuMolecule(cc, bonds_to_scores,
                                       matching_parameters)
        state = mol.generate_search_state()
        for s in itertools.product(*state):
            res = mol.place_bonds(s, matching_parameters)
            if btype == 0:
                self.assertIsNone(res)
            else:
                self.assertIsNotNone(res)
                self.assertLen(res.bonds, 1)
                self.assertEqual(res.bonds[0].bond_type, expected_bond)
Exemplo n.º 8
0
    def process(self, conformer):
        """Called by Beam.

      Returns a TopologyMatches for the plausible BondTopology's in `conformer`.
    Args:
      conformer:

    Yields:
      dataset_pb2.TopologyMatches
    """
        # Adjust as needed...
        #   if conformer.fate != dataset_pb2.Conformer.FATE_SUCCESS:
        #     return
        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.neutral_forms_during_bond_matching = True
        matching_parameters.must_match_all_bonds = True
        matching_parameters.consider_not_bonded = True
        matching_parameters.ring_atom_count_cannot_decrease = False
        yield topology_from_geom.bond_topologies_from_geom(
            self._bond_lengths, conformer.conformer_id, conformer.fate,
            conformer.bond_topologies[0], conformer.optimized_geometry,
            matching_parameters)
    def test_operators(self):
        cc = text_format.Parse(
            """
      atoms: ATOM_C
      atoms: ATOM_C
      atoms: ATOM_C
""", dataset_pb2.BondTopology())
        #   print(f"Generating bonds {btype1} and {btype2}")
        bonds_to_scores = {
            (0, 1): np.zeros(4, dtype=np.float32),
            (1, 2): np.zeros(4, dtype=np.float32)
        }
        scores = np.array([1.0, 3.0], dtype=np.float32)
        bonds_to_scores[(0, 1)][1] = scores[0]
        bonds_to_scores[(1, 2)][1] = scores[1]
        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        mol = smu_molecule.SmuMolecule(cc, bonds_to_scores,
                                       matching_parameters)
        mol.set_initial_score_and_incrementer(1.0, operator.mul)
        state = mol.generate_search_state()
        for s in itertools.product(*state):
            res = mol.place_bonds(s, matching_parameters)
            self.assertAlmostEqual(res.score, np.product(scores))
Exemplo n.º 10
0
    def test_multi_topology_detection(self):
        """Tests that we can find multiple versions of the same topology."""
        single = dataset_pb2.BondTopology.BondType.BOND_SINGLE
        double = dataset_pb2.BondTopology.BondType.BOND_DOUBLE

        all_dist = bond_length_distribution.AllAtomPairLengthDistributions()
        for bond_type in [single, double]:
            all_dist.add(
                dataset_pb2.BondTopology.ATOM_N,
                dataset_pb2.BondTopology.ATOM_N, bond_type,
                bond_length_distribution.FixedWindowLengthDistribution(
                    1.0, 2.0, None))

        # This conformer is a flat aromatic square of nitrogens. The single and
        # double bonds can be rotated such that it's the same topology but
        # individual bonds have switched single/double.
        conformer = dataset_pb2.Conformer()

        conformer.bond_topologies.add(bond_topology_id=123, smiles="N1=NN=N1")
        conformer.bond_topologies[0].atoms.extend([
            dataset_pb2.BondTopology.ATOM_N,
            dataset_pb2.BondTopology.ATOM_N,
            dataset_pb2.BondTopology.ATOM_N,
            dataset_pb2.BondTopology.ATOM_N,
        ])
        conformer.bond_topologies[0].bonds.extend([
            dataset_pb2.BondTopology.Bond(atom_a=0, atom_b=1,
                                          bond_type=single),
            dataset_pb2.BondTopology.Bond(atom_a=1, atom_b=2,
                                          bond_type=double),
            dataset_pb2.BondTopology.Bond(atom_a=2, atom_b=3,
                                          bond_type=single),
            dataset_pb2.BondTopology.Bond(atom_a=3, atom_b=0,
                                          bond_type=double),
        ])

        dist15a = 1.5 / smu_utils_lib.BOHR_TO_ANGSTROMS
        conformer.optimized_geometry.atom_positions.extend([
            dataset_pb2.Geometry.AtomPos(x=0, y=0, z=0),
            dataset_pb2.Geometry.AtomPos(x=0, y=dist15a, z=0),
            dataset_pb2.Geometry.AtomPos(x=dist15a, y=dist15a, z=0),
            dataset_pb2.Geometry.AtomPos(x=dist15a, y=0, z=0),
        ])

        matching_parameters = smu_molecule.MatchingParameters()
        result = topology_from_geom.bond_topologies_from_geom(
            bond_lengths=all_dist,
            conformer_id=123,
            fate=dataset_pb2.Conformer.FATE_SUCCESS,
            bond_topology=conformer.bond_topologies[0],
            geometry=conformer.optimized_geometry,
            matching_parameters=matching_parameters)

        self.assertLen(result.bond_topology, 2)

        # The returned order is arbitrary so we figure out which is is marked
        # as the starting topology.
        starting_idx = min([
            i for i, bt, in enumerate(result.bond_topology)
            if bt.is_starting_topology
        ])
        other_idx = (starting_idx + 1) % 2

        starting = result.bond_topology[starting_idx]
        self.assertTrue(starting.is_starting_topology)
        self.assertEqual(smu_utils_lib.get_bond_type(starting, 0, 1), single)
        self.assertEqual(smu_utils_lib.get_bond_type(starting, 1, 2), double)
        self.assertEqual(smu_utils_lib.get_bond_type(starting, 2, 3), single)
        self.assertEqual(smu_utils_lib.get_bond_type(starting, 3, 0), double)

        other = result.bond_topology[other_idx]
        self.assertFalse(other.is_starting_topology)
        self.assertEqual(smu_utils_lib.get_bond_type(other, 0, 1), double)
        self.assertEqual(smu_utils_lib.get_bond_type(other, 1, 2), single)
        self.assertEqual(smu_utils_lib.get_bond_type(other, 2, 3), double)
        self.assertEqual(smu_utils_lib.get_bond_type(other, 3, 0), single)