Exemplo n.º 1
0
    def fit_with_mapper(self, mapper):
        coords = [
            [0.000000, 0.000000, 0.000000],
            [0.000000, 0.000000, 1.089000],
            [1.026719, 0.000000, -0.363000],
            [-0.513360, -0.889165, -0.363000],
            [-0.513360, 0.889165, -0.363000],
        ]
        mol1 = Molecule(["C", "H", "H", "H", "H"], coords)
        op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60)
        rotcoords = [op.operate(c) for c in coords]
        mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords)
        mm = MoleculeMatcher(mapper=mapper)
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "benzene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t2.xyz"))
        self.assertFalse(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "c1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "c2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "j1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "j2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "ethene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "ethene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "toluene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "toluene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "cyclohexane1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "cyclohexane2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "oxygen1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "oxygen2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mm = MoleculeMatcher(tolerance=0.001, mapper=mapper)
        mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz"))
        self.assertFalse(mm.fit(mol1, mol2))
    def test_to_and_from_dict(self):
        mm = MoleculeMatcher(tolerance=0.5,
                             mapper=InchiMolAtomMapper(angle_tolerance=50.0))
        d = mm.as_dict()
        mm2 = MoleculeMatcher.from_dict(d)
        self.assertEqual(d, mm2.as_dict())

        mm = MoleculeMatcher(tolerance=0.5, mapper=IsomorphismMolAtomMapper())
        d = mm.as_dict()
        mm2 = MoleculeMatcher.from_dict(d)
        self.assertEqual(d, mm2.as_dict())
Exemplo n.º 3
0
 def test_thiane_ethynyl(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir,
                                            "thiane_ethynyl1.sdf"))
     mol2 = Molecule.from_file(os.path.join(test_dir,
                                            "thiane_ethynyl2.sdf"))
     self.assertFalse(mm.fit(mol1, mol2))
Exemplo n.º 4
0
 def test_get_rmsd(self):
     mm = MoleculeMatcher()
     mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                   "t3.xyz")).pymatgen_mol
     mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir,
                                                   "t4.xyz")).pymatgen_mol
     self.assertEqual('{0:7.3}'.format(mm.get_rmsd(mol1, mol2)), "0.00488")
Exemplo n.º 5
0
    def test_random_seed(self, water, ethanol):
        """
        Confirm that seed = -1 generates random structures
        while seed = 1 is deterministic
        """
        mm = MoleculeMatcher()

        # deterministic output
        with tempfile.TemporaryDirectory() as scratch_dir:
            pw = PackmolBoxGen(
                seed=1,
                inputfile="input.in",
                outputfile="output.xyz",
            ).get_input_set(
                # scratch_dir,
                molecules=[
                    {
                        "name": "water",
                        "number": 10,
                        "coords": water
                    },
                    {
                        "name": "ethanol",
                        "number": 20,
                        "coords": ethanol
                    },
                ], )
            pw.write_input(scratch_dir)
            pw.run(scratch_dir)
            out1 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz"))
            pw.run(scratch_dir)
            out2 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz"))
            assert mm.fit(out1, out2)

        # randomly generated structures
        with tempfile.TemporaryDirectory() as scratch_dir:
            pw = PackmolBoxGen(
                seed=-1,
                inputfile="input.in",
                outputfile="output.xyz",
            ).get_input_set(molecules=[
                {
                    "name": "water",
                    "number": 10,
                    "coords": water
                },
                {
                    "name": "ethanol",
                    "number": 20,
                    "coords": ethanol
                },
            ], )
            pw.write_input(scratch_dir)
            pw.run(scratch_dir)
            out1 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz"))
            pw.run(scratch_dir)
            out2 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz"))
            assert not mm.fit(out1, out2)
Exemplo n.º 6
0
 def test_confab_conformers(self):
     mol = pb.readstring("smi", "CCCC").OBMol
     adaptor = BabelMolAdaptor(mol)
     adaptor.make3d()
     conformers = adaptor.confab_conformers()
     self.assertEquals(adaptor.openbabel_mol.NumRotors(), 1)
     self.assertGreaterEqual(len(conformers), 1)
     if len(conformers) > 1:
         self.assertNotAlmostEqual(
             MoleculeMatcher().get_rmsd(conformers[0], conformers[1]), 0)
 def test_group_molecules(self):
     mm = MoleculeMatcher(tolerance=0.001)
     with open(os.path.join(test_dir, "mol_list.txt")) as f:
         filename_list = [line.strip() for line in f.readlines()]
     mol_list = [read_mol(os.path.join(test_dir, f)) for f in filename_list]
     mol_groups = mm.group_molecules(mol_list)
     filename_groups = [[filename_list[mol_list.index(m)] for m in g]
                        for g in mol_groups]
     with open(os.path.join(test_dir, "grouped_mol_list.txt")) as f:
         grouped_text = f.read().strip()
     self.assertEqual(str(filename_groups), grouped_text)
Exemplo n.º 8
0
 def test_group_molecules(self):
     mm = MoleculeMatcher(tolerance=0.001)
     filename_list = None
     with open(os.path.join(test_dir, "mol_list.txt")) as f:
         filename_list = [line.strip() for line in f.readlines()]
     mol_list = [BabelMolAdaptor.from_file(os.path.join(test_dir, f)).pymatgen_mol\
                 for f in filename_list]
     mol_groups = mm.group_molecules(mol_list)
     filename_groups = [[filename_list[mol_list.index(m)] for m in g] for g \
                        in mol_groups]
     grouped_text = None
     with open(os.path.join(test_dir, "grouped_mol_list.txt")) as f:
         grouped_text = f.read().strip()
     self.assertEqual(str(filename_groups), grouped_text)
Exemplo n.º 9
0
    def add_if_belongs(self, cand_snl, exact_match=True):

        # no need to compare if structue is different
        if cand_snl.snlgroup_key != self.canonical_snl.snlgroup_key:
            return False

        # make sure the structure is not already in all_structures
        if cand_snl.snl_id in self.all_snl_ids:
            print('WARNING: add_if_belongs() has detected that you are ' \
                  'trying to add the same SNL id twice!')
            return False

        if exact_match:
            mm = MoleculeMatcher(
                tolerance=0.01, mapper=InchiMolAtomMapper(angle_tolerance=5.0))

            if not mm.fit(cand_snl.structure, self.canonical_structure):
                return False

        # everything checks out, add to the group
        self.all_snl_ids.append(cand_snl.snl_id)
        self.updated_at = datetime.datetime.utcnow()

        return True
Exemplo n.º 10
0
 def test_cdi_23(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "cdi_23_1.xyz"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "cdi_23_2.xyz"))
     self.assertFalse(mm.fit(mol1, mol2))
Exemplo n.º 11
0
 def test_strange_inchi(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "k1.sdf"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "k2.sdf"))
     self.assertTrue(mm.fit(mol1, mol2))
Exemplo n.º 12
0
 def test_get_rmsd(self):
     mm = MoleculeMatcher()
     mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz"))
     self.assertEqual('{0:7.3}'.format(mm.get_rmsd(mol1, mol2)), "0.00488")
Exemplo n.º 13
0
 def test_thiane(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = read_mol(os.path.join(test_dir, "thiane1.sdf"))
     mol2 = read_mol(os.path.join(test_dir, "thiane2.sdf"))
     self.assertFalse(mm.fit(mol1, mol2))
Exemplo n.º 14
0
    def __init__(self, redundancy_parameters, geometry):
        '''
        Makes a RedundancyGuard, and sets default parameter values if
        necessary.

        TODO: currently using pymatgen's structure matcher for comparing bulk
            and sheet structures, both pymatgen's structure matcher and
            molecule matcher for comparing wires, and only the molecule matcher
            for clusters. The sheet and wire cases aren't ideal, since the
            structure matcher assumes periodicity in all three dimensions, and
            the molecule matcher assumes no periodicity.

        Args:
            redundancy parameters: a dictionary of parameters

            geometry: the Geometry object
        '''

        # defaults
        #
        # lattice length tolerance, in fractional coordinates
        self.default_lattice_length_tol = 0.05
        # lattice angle tolerance, in degrees
        self.default_lattice_angle_tol = 2
        # site tolerance, in fraction of average free length per atom
        self.default_site_tol = 0.1
        # whether to transform to primitive cells before comparing
        self.default_use_primitive_cell = True
        # whether to check if structures are equal to supercells of each other
        self.default_attempt_supercell = True
        # RMSD tolerance for comparing clusters
        self.default_rmsd_tol = 0.1
        # the epa difference interval
        self.default_epa_diff = 0.0

        # set to defaults
        if redundancy_parameters in (None, 'default'):
            self.set_all_to_defaults()
        # parse the parameters, and set to defaults if necessary
        else:
            # lattice length tolerance
            if 'lattice_length_tol' not in redundancy_parameters:
                self.lattice_length_tol = self.default_lattice_length_tol
            elif redundancy_parameters['lattice_length_tol'] in (None,
                                                                 'default'):
                self.lattice_length_tol = self.default_lattice_length_tol
            else:
                self.lattice_length_tol = redundancy_parameters[
                    'lattice_length_tol']

            # lattice angle tolerance
            if 'lattice_angle_tol' not in redundancy_parameters:
                self.lattice_angle_tol = self.default_lattice_angle_tol
            elif redundancy_parameters['lattice_angle_tol'] in (None,
                                                                'default'):
                self.lattice_angle_tol = self.default_lattice_angle_tol
            else:
                self.lattice_angle_tol = redundancy_parameters[
                    'lattice_angle_tol']

            # site tolerance
            if 'site_tol' not in redundancy_parameters:
                self.site_tol = self.default_site_tol
            elif redundancy_parameters['site_tol'] in (None, 'default'):
                self.site_tol = self.default_site_tol
            else:
                self.site_tol = redundancy_parameters['site_tol']

            # whether to use primitive cells
            if 'use_primitive_cell' not in redundancy_parameters:
                self.use_primitive_cell = self.default_use_primitive_cell
            elif redundancy_parameters['use_primitive_cell'] in (None,
                                                                 'default'):
                self.use_primitive_cell = self.default_use_primitive_cell
            else:
                self.use_primitive_cell = redundancy_parameters[
                    'use_primitive_cell']

            # whether to try matching supercells
            if 'attempt_supercell' not in redundancy_parameters:
                self.attempt_supercell = self.default_attempt_supercell
            elif redundancy_parameters['attempt_supercell'] in (None,
                                                                'default'):
                self.attempt_supercell = self.default_attempt_supercell
            else:
                self.attempt_supercell = redundancy_parameters[
                    'attempt_supercell']

            # RMSD tolerance
            if 'rmsd_tol' not in redundancy_parameters:
                self.rmsd_tol = self.default_rmsd_tol
            elif redundancy_parameters['rmsd_tol'] in (None, 'default'):
                self.rmsd_tol = self.default_rmsd_tol
            else:
                self.rmsd_tol = redundancy_parameters['rmsd_tol']

            # epa difference
            if 'epa_diff' not in redundancy_parameters:
                self.epa_diff = self.default_epa_diff
            elif redundancy_parameters['epa_diff'] in (None, 'default'):
                self.epa_diff = self.default_epa_diff
            else:
                self.epa_diff = redundancy_parameters['epa_diff']

        # make the StructureMatcher object
        #
        # the first False is to prevent the matcher from scaling the volumes,
        # and the second False is to prevent subset matching
        self.structure_matcher = StructureMatcher(
            self.lattice_length_tol, self.site_tol, self.lattice_angle_tol,
            self.use_primitive_cell, False, self.attempt_supercell, False,
            ElementComparator())

        # make the MoleculeMatcher object
        if geometry.shape == 'cluster' or geometry.shape == 'wire':
            iso_mol_atom_mapper = IsomorphismMolAtomMapper()
            self.molecule_matcher = MoleculeMatcher(self.rmsd_tol,
                                                    iso_mol_atom_mapper)
            ob.obErrorLog.SetOutputLevel(0)  # to suppress openbabel warnings