def fit_with_mapper(self, mapper): coords = [ [0.000000, 0.000000, 0.000000], [0.000000, 0.000000, 1.089000], [1.026719, 0.000000, -0.363000], [-0.513360, -0.889165, -0.363000], [-0.513360, 0.889165, -0.363000], ] mol1 = Molecule(["C", "H", "H", "H", "H"], coords) op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60) rotcoords = [op.operate(c) for c in coords] mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords) mm = MoleculeMatcher(mapper=mapper) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "benzene2.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "t2.xyz")) self.assertFalse(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "c1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "c2.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "j1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "j2.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "ethene1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "ethene2.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "toluene1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "toluene2.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "cyclohexane1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "cyclohexane2.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mol1 = Molecule.from_file(os.path.join(test_dir, "oxygen1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "oxygen2.xyz")) self.assertTrue(mm.fit(mol1, mol2)) mm = MoleculeMatcher(tolerance=0.001, mapper=mapper) mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz")) self.assertFalse(mm.fit(mol1, mol2))
def test_to_and_from_dict(self): mm = MoleculeMatcher(tolerance=0.5, mapper=InchiMolAtomMapper(angle_tolerance=50.0)) d = mm.as_dict() mm2 = MoleculeMatcher.from_dict(d) self.assertEqual(d, mm2.as_dict()) mm = MoleculeMatcher(tolerance=0.5, mapper=IsomorphismMolAtomMapper()) d = mm.as_dict() mm2 = MoleculeMatcher.from_dict(d) self.assertEqual(d, mm2.as_dict())
def test_thiane_ethynyl(self): mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper()) mol1 = Molecule.from_file(os.path.join(test_dir, "thiane_ethynyl1.sdf")) mol2 = Molecule.from_file(os.path.join(test_dir, "thiane_ethynyl2.sdf")) self.assertFalse(mm.fit(mol1, mol2))
def test_get_rmsd(self): mm = MoleculeMatcher() mol1 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t3.xyz")).pymatgen_mol mol2 = BabelMolAdaptor.from_file(os.path.join(test_dir, "t4.xyz")).pymatgen_mol self.assertEqual('{0:7.3}'.format(mm.get_rmsd(mol1, mol2)), "0.00488")
def test_random_seed(self, water, ethanol): """ Confirm that seed = -1 generates random structures while seed = 1 is deterministic """ mm = MoleculeMatcher() # deterministic output with tempfile.TemporaryDirectory() as scratch_dir: pw = PackmolBoxGen( seed=1, inputfile="input.in", outputfile="output.xyz", ).get_input_set( # scratch_dir, molecules=[ { "name": "water", "number": 10, "coords": water }, { "name": "ethanol", "number": 20, "coords": ethanol }, ], ) pw.write_input(scratch_dir) pw.run(scratch_dir) out1 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz")) pw.run(scratch_dir) out2 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz")) assert mm.fit(out1, out2) # randomly generated structures with tempfile.TemporaryDirectory() as scratch_dir: pw = PackmolBoxGen( seed=-1, inputfile="input.in", outputfile="output.xyz", ).get_input_set(molecules=[ { "name": "water", "number": 10, "coords": water }, { "name": "ethanol", "number": 20, "coords": ethanol }, ], ) pw.write_input(scratch_dir) pw.run(scratch_dir) out1 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz")) pw.run(scratch_dir) out2 = Molecule.from_file(os.path.join(scratch_dir, "output.xyz")) assert not mm.fit(out1, out2)
def test_confab_conformers(self): mol = pb.readstring("smi", "CCCC").OBMol adaptor = BabelMolAdaptor(mol) adaptor.make3d() conformers = adaptor.confab_conformers() self.assertEquals(adaptor.openbabel_mol.NumRotors(), 1) self.assertGreaterEqual(len(conformers), 1) if len(conformers) > 1: self.assertNotAlmostEqual( MoleculeMatcher().get_rmsd(conformers[0], conformers[1]), 0)
def test_group_molecules(self): mm = MoleculeMatcher(tolerance=0.001) with open(os.path.join(test_dir, "mol_list.txt")) as f: filename_list = [line.strip() for line in f.readlines()] mol_list = [read_mol(os.path.join(test_dir, f)) for f in filename_list] mol_groups = mm.group_molecules(mol_list) filename_groups = [[filename_list[mol_list.index(m)] for m in g] for g in mol_groups] with open(os.path.join(test_dir, "grouped_mol_list.txt")) as f: grouped_text = f.read().strip() self.assertEqual(str(filename_groups), grouped_text)
def test_group_molecules(self): mm = MoleculeMatcher(tolerance=0.001) filename_list = None with open(os.path.join(test_dir, "mol_list.txt")) as f: filename_list = [line.strip() for line in f.readlines()] mol_list = [BabelMolAdaptor.from_file(os.path.join(test_dir, f)).pymatgen_mol\ for f in filename_list] mol_groups = mm.group_molecules(mol_list) filename_groups = [[filename_list[mol_list.index(m)] for m in g] for g \ in mol_groups] grouped_text = None with open(os.path.join(test_dir, "grouped_mol_list.txt")) as f: grouped_text = f.read().strip() self.assertEqual(str(filename_groups), grouped_text)
def add_if_belongs(self, cand_snl, exact_match=True): # no need to compare if structue is different if cand_snl.snlgroup_key != self.canonical_snl.snlgroup_key: return False # make sure the structure is not already in all_structures if cand_snl.snl_id in self.all_snl_ids: print('WARNING: add_if_belongs() has detected that you are ' \ 'trying to add the same SNL id twice!') return False if exact_match: mm = MoleculeMatcher( tolerance=0.01, mapper=InchiMolAtomMapper(angle_tolerance=5.0)) if not mm.fit(cand_snl.structure, self.canonical_structure): return False # everything checks out, add to the group self.all_snl_ids.append(cand_snl.snl_id) self.updated_at = datetime.datetime.utcnow() return True
def test_cdi_23(self): mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper()) mol1 = Molecule.from_file(os.path.join(test_dir, "cdi_23_1.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "cdi_23_2.xyz")) self.assertFalse(mm.fit(mol1, mol2))
def test_strange_inchi(self): mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper()) mol1 = Molecule.from_file(os.path.join(test_dir, "k1.sdf")) mol2 = Molecule.from_file(os.path.join(test_dir, "k2.sdf")) self.assertTrue(mm.fit(mol1, mol2))
def test_get_rmsd(self): mm = MoleculeMatcher() mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz")) mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz")) self.assertEqual('{0:7.3}'.format(mm.get_rmsd(mol1, mol2)), "0.00488")
def test_thiane(self): mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper()) mol1 = read_mol(os.path.join(test_dir, "thiane1.sdf")) mol2 = read_mol(os.path.join(test_dir, "thiane2.sdf")) self.assertFalse(mm.fit(mol1, mol2))
def __init__(self, redundancy_parameters, geometry): ''' Makes a RedundancyGuard, and sets default parameter values if necessary. TODO: currently using pymatgen's structure matcher for comparing bulk and sheet structures, both pymatgen's structure matcher and molecule matcher for comparing wires, and only the molecule matcher for clusters. The sheet and wire cases aren't ideal, since the structure matcher assumes periodicity in all three dimensions, and the molecule matcher assumes no periodicity. Args: redundancy parameters: a dictionary of parameters geometry: the Geometry object ''' # defaults # # lattice length tolerance, in fractional coordinates self.default_lattice_length_tol = 0.05 # lattice angle tolerance, in degrees self.default_lattice_angle_tol = 2 # site tolerance, in fraction of average free length per atom self.default_site_tol = 0.1 # whether to transform to primitive cells before comparing self.default_use_primitive_cell = True # whether to check if structures are equal to supercells of each other self.default_attempt_supercell = True # RMSD tolerance for comparing clusters self.default_rmsd_tol = 0.1 # the epa difference interval self.default_epa_diff = 0.0 # set to defaults if redundancy_parameters in (None, 'default'): self.set_all_to_defaults() # parse the parameters, and set to defaults if necessary else: # lattice length tolerance if 'lattice_length_tol' not in redundancy_parameters: self.lattice_length_tol = self.default_lattice_length_tol elif redundancy_parameters['lattice_length_tol'] in (None, 'default'): self.lattice_length_tol = self.default_lattice_length_tol else: self.lattice_length_tol = redundancy_parameters[ 'lattice_length_tol'] # lattice angle tolerance if 'lattice_angle_tol' not in redundancy_parameters: self.lattice_angle_tol = self.default_lattice_angle_tol elif redundancy_parameters['lattice_angle_tol'] in (None, 'default'): self.lattice_angle_tol = self.default_lattice_angle_tol else: self.lattice_angle_tol = redundancy_parameters[ 'lattice_angle_tol'] # site tolerance if 'site_tol' not in redundancy_parameters: self.site_tol = self.default_site_tol elif redundancy_parameters['site_tol'] in (None, 'default'): self.site_tol = self.default_site_tol else: self.site_tol = redundancy_parameters['site_tol'] # whether to use primitive cells if 'use_primitive_cell' not in redundancy_parameters: self.use_primitive_cell = self.default_use_primitive_cell elif redundancy_parameters['use_primitive_cell'] in (None, 'default'): self.use_primitive_cell = self.default_use_primitive_cell else: self.use_primitive_cell = redundancy_parameters[ 'use_primitive_cell'] # whether to try matching supercells if 'attempt_supercell' not in redundancy_parameters: self.attempt_supercell = self.default_attempt_supercell elif redundancy_parameters['attempt_supercell'] in (None, 'default'): self.attempt_supercell = self.default_attempt_supercell else: self.attempt_supercell = redundancy_parameters[ 'attempt_supercell'] # RMSD tolerance if 'rmsd_tol' not in redundancy_parameters: self.rmsd_tol = self.default_rmsd_tol elif redundancy_parameters['rmsd_tol'] in (None, 'default'): self.rmsd_tol = self.default_rmsd_tol else: self.rmsd_tol = redundancy_parameters['rmsd_tol'] # epa difference if 'epa_diff' not in redundancy_parameters: self.epa_diff = self.default_epa_diff elif redundancy_parameters['epa_diff'] in (None, 'default'): self.epa_diff = self.default_epa_diff else: self.epa_diff = redundancy_parameters['epa_diff'] # make the StructureMatcher object # # the first False is to prevent the matcher from scaling the volumes, # and the second False is to prevent subset matching self.structure_matcher = StructureMatcher( self.lattice_length_tol, self.site_tol, self.lattice_angle_tol, self.use_primitive_cell, False, self.attempt_supercell, False, ElementComparator()) # make the MoleculeMatcher object if geometry.shape == 'cluster' or geometry.shape == 'wire': iso_mol_atom_mapper = IsomorphismMolAtomMapper() self.molecule_matcher = MoleculeMatcher(self.rmsd_tol, iso_mol_atom_mapper) ob.obErrorLog.SetOutputLevel(0) # to suppress openbabel warnings