def toCCDCStructure(self, pointer=None, stage=None, identifier=None): ''' Returns CCDC structure instance from appropriate pointer default atm is crystal optimiser stage is SetCSPStructures.calculation = 'Data' (as general rule) give identifier to change the name in the cif ''' from ccdc.io import CrystalReader # temporary fail as code work in progress if stage not in ['crystalOptimiserData', 'pcmData']: raise Exception( 'Implement other options to get structures if needed') #generalize this to be self.getAttribute(pointer = {stage: lowestStructurePointer}) if pointer is None if pointer is None: pointer = self.getAttribute({stage: 'finalStructurePointer'}) # CrystalReader can be told explicitly which file format to look for if pointer[-3:].lower() in ['res', '.16']: #.16 is fort.16 from DMACRYS fileFormat = 'res' elif pointer[-3:].lower() == 'cif': fileFormat = 'cif' ccdcCrystal = CrystalReader(pointer, format=fileFormat)[0] if identifier: ccdcCrystal.identifier = identifier return ccdcCrystal
def main(): # names of files (all strings) try: try: oldResFile, templateResFile, newFilename = sys.argv[1:] except: oldResFile, templateResFile, newFilename, enforcement = sys.argv[ 1:] except: raise Exception(usefulMessage) # use the above subroutines to achieve the relabelling # uses ccdc molecule instances with open(newFilename, 'w') as outf: if len(sys.argv) == 4: outf.write( reorderedRes( oldResFile, replacementDict( CrystalReader(templateResFile)[0], CrystalReader(oldResFile)[0]))) elif len(sys.argv) == 5 and sys.argv[4] == 'y': outf.write( reorderedRes(oldResFile, replacementDict( CrystalReader(templateResFile)[0], CrystalReader(oldResFile)[0]), enforceStandardListOrder=True, enforceLowestNumbering=True))
def cacl_density(doi): print doi query = TextNumericSearch() query.add_doi(doi) hits = query.search() ref_codes = [] for hit in hits: ref_codes.append(hit.identifier) csd_crystal_reader = CrystalReader('CSD') if len(ref_codes) == 0: density = 1 else: crystal = csd_crystal_reader.crystal(ref_codes[0]) density = crystal.calculated_density return "%.3f" % density
def calc_powder_from_pdb(self, pdbfile): crystal = CrystalReader(pdbfile)[0] print crystal.spacegroup_symbol print 'angles', crystal.cell_angles[0], crystal.cell_angles[1], crystal.cell_angles[2] print 'length', crystal.cell_lengths[0], crystal.cell_lengths[1], crystal.cell_lengths[2] print 'volume', crystal.cell_volume pattern = PowderPattern.from_crystal(crystal) pattern.write_xye_file(('./'+pdbfile.replace('.pdb','.xye')))
def test_ccdcToASE(self): from ioAndInterfaces import ccdcCrystalToASE from ccdc.io import CrystalReader ccdcCrystal = CrystalReader('FPAMCA16.res')[0] #print ccdcCrystal aseCrystal = ccdcCrystalToASE(ccdcCrystal) #print aseCrystal#.symbols # this is a Z'=4 30 atom molecule self.assertEqual(len(aseCrystal), 4 * 30)
def main(): try: # oldString, oldResFile, templateResFile = sys.argv[1:] oldString, resFile = sys.argv[1:] except: raise Exception(usefulMessage) # print simpleRelabelling("+O1_C1_C2_C7 +H1_O1_C1_C2 +C1_C2_C7_N1 +C7_N1_C8_C9 +H6_N1_C8_C9 +F1_C14_C12_C11 +H6_N1_C8 +H1_O1_C1", print simpleRelabelling("O1_C1_C2_C7 H1_O1_C1_C2 C1_C2_C7_N1 C7_N1_C8_C9 H6_N1_C8_C9 F1_C14_C12_C11 H6_N1_C8 H1_O1_C1", CrystalReader(resFile)[0])
def _get_crystal(obj): """ Convert a obj's writable by MoleculeWriter to a crystal :param `ccdc.molecule.Molecule` obj: molecule or protein :return: `ccdc.crystal.Crystal` """ tmp = tempfile.mkdtemp() f = os.path.join(tmp, "obj.mol2") with MoleculeWriter(f) as w: w.write(obj) return CrystalReader(f)[0]
def test_SetCSPStructures(self): from cspTracking import SetCSPStructures tempSet = SetCSPStructures.initFromCrystalPredictorLog('exampleCP.log') self.assertEqual(len(tempSet.structures), 27414) self.assertEqual(tempSet.structures[0].crystalPredictorData['spaceGroup'], 'P-1') self.assertEqual(tempSet.structures[5].idNumber, 6) tempSet.structures[0].createCrystalOptimiserData('1-1/exampleCOSummary.out') self.assertEqual(tempSet.structures[0].crystalOptimiserData['rhoFinal'], 1.4749) #remove the data and add again with different method tempSet.structures[0].crystalOptimiserData = {} self.assertEqual(len(tempSet.structures[0].crystalOptimiserData.keys()), 0) # tempSet.addCrystalOptimiserInfoFromGlobString('1-1/*ummary.out') tempSet.addInfoFromGlobString('1-1/*ummary.out') self.assertEqual(tempSet.structures[0].crystalOptimiserData['rhoFinal'], 1.4749) self.assertTrue(tempSet.structures[0].crystalOptimiserComplete) # get free energy data from Jonas's log files tempSet.addFreeEnergyData(lambda x: 'free_energy.log') self.assertEqual(tempSet.structures[-1].getAttribute({'freeEnergyData': 'ZPE'}), 2.7239) #get a subset of data, and assert that it has a positive density from cspTracking import Attribute self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer='crystalOptimiserComplete', value = True)])), len([x for x in tempSet.structures if x.crystalOptimiserComplete])) self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer = {'crystalOptimiserData': 'rhoFinal'}, minValue = 0.)])), 1) self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer = {'crystalOptimiserData': 'rhoFinal'}, maxValue = 0.)])), 0) self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer = {'crystalOptimiserData': 'rhoFinal'}, minValue = 0.1, maxValue = 0.4)])), 0) # tempSet.addCrystalOptimiserInfoFromGlobString('2-1/*ummary.out') tempSet.addInfoFromGlobString('2-1/*ummary.out') self.assertEqual(len(tempSet.subsetStructures([Attribute(pointer = {'crystalOptimiserData': 'rhoFinal'}, minValue = 1.1, maxValue = 2.4)])), 2) #lowest one for rho has idNumber 2 self.assertEqual([x.getAttribute('idNumber') for x in tempSet.subsetStructures([Attribute(pointer = {'crystalOptimiserData': 'rhoFinal'}, minValue = 1.1, maxValue = 2.4)], sortAttribute=Attribute(pointer = {'crystalOptimiserData': 'rhoFinal'}))][0], 2) #turn data into a pandas DataFrame dataTable = tempSet.toPandasDataFrame() self.assertEqual(dataTable['SG'][0], 'P-1') #test matching from ccdc.io import CrystalReader from cspTracking import CSPStructure matchingSet = SetCSPStructures([CSPStructure(1, initCrystalOptimiserData='1-1/exampleCOSummary.out')]) #match to itself just as an example self.assertTrue(matchingSet.structures[0].crystalOptimiserData['finalStructureFileExists']) matchDict = matchingSet.matchToStructureList([CrystalReader('1-1/lowest.res')[0]], stage='crystalOptimiserData') np.testing.assert_array_equal(matchDict['matchesMatrix'], np.array([[15]])) self.assertEqual(matchDict['candidateList'][0].idNumber, 1) #do same match but add RMSD data matchDict = matchingSet.matchToStructureList([CrystalReader('1-1/lowest.res')[0]], returnRMSD=True, stage='crystalOptimiserData') np.testing.assert_array_equal(matchDict['rmsdMatrix'], np.array([[0.]])) #test packing coefficient (what method does CCDC use??) self.assertAlmostEqual(matchingSet.structures[0].packingCoefficient(stage='crystalOptimiserData'), 0.69370122631) #get minimum energy of list self.assertEqual(matchingSet.minValueInSet(Attribute(pointer = {'crystalOptimiserData': 'eTotalFinal'})), -133.44284) #test writing a CIF - tear down data at the end nameOfCIF = 'tempCIFWrite.cif' self.assertFalse(os.path.isfile(nameOfCIF)) matchingSet.calculation = 'crystalOptimiser' # this should be known before matching really matchingSet.makeCIF(nameOfCIF) self.assertTrue(os.path.isfile(nameOfCIF)) os.remove(nameOfCIF) #initiate via CSV then match csvStructures = SetCSPStructures.initFromCSV('example.csv', idNumberIndex = 0, finalEnergyIndex = 1, finalDensityIndex = 3, finalStructureDir = os.path.dirname(os.path.realpath(__file__)), assertZPrime = 1.234) self.assertEqual(csvStructures.structures[0].idNumber, 978) self.assertEqual(csvStructures.structures[0].getAttribute({'crystalOptimiserData': 'zPrime'}), 1.234) self.assertAlmostEqual(csvStructures.structures[0].getAttribute({'crystalOptimiserData': 'eTotalFinal'}), -139.35473) self.assertAlmostEqual(csvStructures.structures[0].getAttribute({'crystalOptimiserData': 'rhoFinal'}), 1.26410) print 'check match' #use general get and set attributes newStructureSet = SetCSPStructures([CSPStructure(i) for i in range(1,5)], calculation = 'crystalOptimiser') newStructureSet.structures[2].setAttribute(Attribute(pointer={'crystalOptimiserData': 'rhoFinal'}, value = 23.4)) self.assertEqual(newStructureSet.structures[2].getAttribute({'crystalOptimiserData': 'rhoFinal'}), 23.4) newStructureSet.setGlobalAttributes([Attribute(pointer={'crystalOptimiserData': 'rhoFinal'}, value=1.0)]) self.assertEqual(newStructureSet.structures[2].getAttribute({'crystalOptimiserData': 'rhoFinal'}), 1.) self.assertEqual(newStructureSet.calculation, 'crystalOptimiser') # print newStructureSet.structures[0].__dict__ print newStructureSet.statusString() #read some PCM data newStructureSet.addInfoFromGlobString('1-1/PCM/1-1', calculation = 'pcm') self.assertAlmostEqual(newStructureSet.structures[0].pcmData['eInterFinal'], -149.0522) self.assertAlmostEqual(newStructureSet.structures[0].pcmData['eIntraHartrees'], -1043.20561970) self.assertTrue(newStructureSet.structures[0].pcmData['finalStructureFileExists']) def tearDown(self): print 'tearing down', os.path.isfile('tempCIFWrite.cif')
def rmsdtime(k, m): rpt = 0 xax = [] process_time_list = [] compare_time_list = [] total_time_list = [] while rpt <= m: file_extract = file_select(k) filename_split = [i.split("/") for i in file_extract] filename_list = [str(i[len(i) - 1][4:-4]) for i in filename_split] print("Files to be compared:") print(filename_list) a = time.perf_counter() crystals = [CrystalReader(c)[0] for c in file_extract] #packed_crystals = [c.packing() for c in crystals] b = time.perf_counter() process_time_list.append(b - a) print(f'Processing time (s) for %s molecules:', k) print((b - a)) print(process_time_list) c = time.perf_counter() for i, j in itertools.combinations_with_replacement( range(len(crystals)), 2): comp = similarity_engine.compare(crystals[i], crystals[j]) d = time.perf_counter() print(f'Comparison time (s) for %s molecules:', k) print((d - c) + (a - b)) compare_time_list.append(d - c) print([compare_time_list]) total_time_list.append((b - a) + (d - c)) print([total_time_list]) rpt += 1 xax.append(rpt) plt.plot(xax, process_time_list, label='Processing Time') plt.plot(xax, compare_time_list, label='Comparison Time') plt.plot(xax, compare_time_list, label='Total Time') plt.xlabel('Run number') plt.ylabel('Computation time') plt.title(f'RMSD Comparison') plt.legend() plt.savefig(outputdir + "/RMSD_time_test.png") plt.cla() print(f'Average rmsd processing time (s) for 5 molecules:') print(sum(process_time_list) / len(process_time_list)) print(f'Average rmsed pairwise comparison time for 5 molecules') print(sum(compare_time_list) / len(compare_time_list)) print(f'Total rmsd time for %s molecules', k) print(sum(total_time_list) / len(total_time_list)) return [ sum(process_time_list) / len(process_time_list), sum(compare_time_list) / len(compare_time_list), sum(total_time_list) / len(total_time_list) ]
def powdertime(k, m): rpt = 0 xax = [] process_time_list = [] compare_time_list = [] total_time_list = [] while rpt <= m: file_extract = file_select(k) #filename_split = [i.split("/") for i in file_extract] #filename_list = [str(i[len(i)-1][4:-4]) for i in filename_split] a = time.perf_counter() crystals = [CrystalReader(c)[0] for c in file_extract] #packed_crystals = [c.packing() for c in crystals] powders = [CD.PowderPattern.from_crystal(c) for c in crystals] b = time.perf_counter() process_time_list.append(b - a) print(f'Processing time (s) for 5 molecules') print((b - a)) c = time.perf_counter() for i, j in itertools.combinations_with_replacement( range(len(crystals)), 2): powd = powders[i].similarity(powders[j]) d = time.perf_counter() compare_time_list.append(d - c) total_time_list.append((b - a) + (d - c)) print(f'Compariosn time (s) for 5 molecules:') rpt += 1 xax.append(rpt) plt.plot(xax, process_time_list, label='Processing Time') plt.plot(xax, compare_time_list, label='Comparison Time') plt.plot(xax, total_time_list, label='Total Time') plt.xlabel('Run number') plt.ylabel('Computation time') plt.title('Powder Comparison') plt.legend() plt.savefig(outputdir + "/powder_time_test.png") plt.cla() print(f'Average powder processing time (s) for %s molecules:', k) print(sum(process_time_list) / len(process_time_list)) print(f'Average powder pairwise comparison time for %s molecules', k) print(sum(compare_time_list) / len(compare_time_list)) print(f'Total powder time for %s molecules', k) print(sum(total_time_list) / len(total_time_list)) return [ sum(process_time_list) / len(process_time_list), sum(compare_time_list) / len(compare_time_list), sum(total_time_list) / len(total_time_list) ]
try: return compareResult.nmatched_molecules except: return None # Set up comparison object ps = PackingSimilarity() allowMolecularDifferences = True clusterSize = 15 ps.settings.allow_molecular_differences = allowMolecularDifferences ps.settings.packing_shell_size = clusterSize # define crystals try: crystal1 = CrystalReader(sys.argv[1], format='res')[0] except: crystal1 = CrystalReader(sys.argv[1], format='cif')[0] try: crystal2 = CrystalReader(sys.argv[2], format='res')[0] except: crystal2 = CrystalReader(sys.argv[2], format='cif')[0] matchingData = returnNmatched_molecules(ps, crystal1, crystal2, returnRMSD=True) #print "Matching {1} molecules (out of {2}) with RMSD = {3} Angstroms".format(clusterSize,clusterSize,clusterSize) print "Allowing molecular differences? ", allowMolecularDifferences print "Matching %s molecules (out of %s) with RMSD = %s Angstroms" % (
link_list = [] print(st + e) for file_path in files[st:e]: file_path_split = file_path.split("/") file_name = file_path_split[len(file_path_split) - 1] file_list.append(file_name) doc = cif.read(file_path) block = doc[0] for b in doc: if (b.find_loop('_atom_site_') != None): block = b print("Processed file --> " + file_name) crystal_reader = CrystalReader(file_path) crystal = crystal_reader[0] crystal.assign_bonds() packed_molecules = crystal.packing(box_dimensions=((0, 0, 0), (1, 1, 1)), inclusion='CentroidIncluded') packed_molecules.normalise_labels() adta_molecules = [] cent = [] cent_points = [] for comp in packed_molecules.components: if (len(comp.atoms) > 1): adta_molecules.append(comp) cent.append(MD.atom_centroid(*list(a for a in comp.atoms))) for c in cent:
from ase.build import niggli_reduce from ase import Atoms from ase.io import write as aseWrite from ioAndInterfaces import ccdcCrystalToASE from ccdc.io import CrystalReader #example input and outputs to test this inputRes, outputRes = 'testingScripts/new.res', 'temp070917.res' myCell = CrystalReader(inputRes)[0] myASECell = ccdcCrystalToASE(myCell) myASECell.set_pbc(True) niggli_reduce(myASECell) aseWrite(outputRes, myASECell) print 'issue with this is that crystal optimiser wants labels etc, and probably fussy with res file format' #could attach labels or something if can be bothered print 'another issue is that the cell may not have the same convention, so would have to reset rather than just changing angles etc'
for pair in MolecularDescriptors.MaximumCommonSubstructure().search(mol1, mol2)[0]]) def main(): try: # oldString, oldResFile, templateResFile = sys.argv[1:] oldString, resFile = sys.argv[1:] except: raise Exception(usefulMessage) # print simpleRelabelling("+O1_C1_C2_C7 +H1_O1_C1_C2 +C1_C2_C7_N1 +C7_N1_C8_C9 +H6_N1_C8_C9 +F1_C14_C12_C11 +H6_N1_C8 +H1_O1_C1", print simpleRelabelling("O1_C1_C2_C7 H1_O1_C1_C2 C1_C2_C7_N1 C7_N1_C8_C9 H6_N1_C8_C9 F1_C14_C12_C11 H6_N1_C8 H1_O1_C1", CrystalReader(resFile)[0]) if __name__ == '__main__': main() exit() print relabelString("+O1_C1_C2_C7 +H1_O1_C1_C2 +C1_C2_C7_N1 +C7_N1_C8_C9 +H6_N1_C8_C9 +F1_C14_C12_C11 +H6_N1_C8 +H1_O1_C1", replacementDict(CrystalReader(templateResFile)[0], CrystalReader(oldResFile)[0])) exit() exit() # use the above subroutines to achieve the relabelling # uses ccdc molecule instances with open(newFilename, 'w') as outf: outf.write(reorderedRes(oldResFile, replacementDict(CrystalReader(templateResFile)[0], CrystalReader(oldResFile)[0])) )
def linkcomp(start, end, pcomp=True, rcomp=True, lktest=False): full_list = [] file_extract = files[start:end] filename_split = [i.split("/") for i in file_extract] filename_list = [str(i[len(i) - 1][4:-4]) for i in filename_split] if lktest: lkarray = np.zeros((len(file_extract), len(file_extract))) if pcomp: powdarray = np.zeros((len(file_extract), len(file_extract))) if rcomp: rmsdarray = np.zeros((len(file_extract), len(file_extract))) for first_comp in file_extract: i = file_extract.index(first_comp) rest_list = file_extract[i + 1:end] new_entry = [] for second_comp in rest_list: j = file_extract.index(second_comp) first_comp_split = first_comp.split("/") second_comp_split = second_comp.split("/") one_crys_read = CrystalReader(first_comp) two_crys_read = CrystalReader(second_comp) one_crys = one_crys_read[0] two_crys = two_crys_read[0] powder_sim = CD.PowderPattern.from_crystal(one_crys) powder_comp = CD.PowderPattern.from_crystal(two_crys) comp = similarity_engine.compare(one_crys, two_crys) powd = powder_sim.similarity(powder_comp) if lktest: try: lkarray[j][i] += (round( abs( max(link_dic[first_file_name]) - max(link_dic[second_file_name])), 3)) except TypeError: lkarray[j][i] += np.nan if pcomp: if powd is None: powdarray[j][i] += np.nan else: powdarray[j][i] += (round(powd, 3)) if rcomp: if comp is None: rmsdarray[j][i] += np.nan else: rmsdarray[j][i] += (round(comp.rmsd, 3)) if lktest: lkframe = pd.DataFrame(lkarray, index=filename_list, columns=filename_list) lkframe.to_csv(outputdir + "/link_comparison.csv", index=True, header=True, sep=',') if pcomp: powdframe = pd.DataFrame(powdarray, index=filename_list, columns=filename_list) powdframe.to_csv(outputdir + "/powder_comparison.csv", index=True, header=True, sep=',') if rcomp: powdframe = pd.DataFrame(rmsdarray, index=filename_list, columns=filename_list) powdframe.to_csv(outputdir + "/rmsd_comparison.csv", index=True, header=True, sep=',')
def labelIncludingNumberMols(atom1Label, stoichiometry, molIndex): element1, number1 = re.split('(\d+)', atom1Label)[:2] return element1 + str( int(number1) + int(stoichiometry[element1]) * molIndex) blurb = '''This file contains the Z-matrix definition for each of the molecules in the asymmetric unit of the crystal to be minimised by CrystalOptimizer. It is absolutely necessary to contain an 'introductory' line for the Z-matrix of each molecule followed imidiately (no empty line) by the Z-matrix definition. The introductory line should be 'Z-matrix for molecule' followed by the enumeration of teh molecule. The Z-matrix definition should be done using the atom labels in exactly the same way as they appear in the .res file.''' crystal = CrystalReader(sys.argv[1])[0] stoichiometry = dict([ re.split('(\d+)', x)[:2] for x in crystal.molecule.components[0].formula.split() ]) zMatrixString = open(sys.argv[2], 'r').read() print blurb #zMatrixString for i in xrange(len(crystal.molecule.components)): print "\nZ-matrix for molecule %s" % (i + 1) for l in zMatrixString.split('\n')[1:]: # print l print " ".join( [labelIncludingNumberMols(x, stoichiometry, i) for x in l.split()])
def molcomp(start, end, pcomp=True, rmsd_threshold=2, rounding=6, rcomp=True, molcomp=True): filename_split = [i.split("/") for i in file_extract] filename_list = [str(i[len(i) - 1][4:-4]) for i in filename_split] l = len(file_extract) if molcomp: molarray = np.zeros((l, l)) if pcomp: powdarray = np.zeros((l, l)) if rcomp: rmsdarray = np.zeros((l, l)) crystals = [CrystalReader(c)[0] for c in file_extract] powders = [CD.PowderPattern.from_crystal(c) for c in crystals] for i, j in itertools.combinations_with_replacement( range(len(crystals)), 2): comp = similarity_engine.compare(crystals[i], crystals[j]) powd = powders[i].similarity(powders[j]) if molcomp: try: molarray[j][i] += (comp.nmatched_molecules) molarray[i][j] += (comp.nmatched_molecules) except TypeError: molarray[j][i] += 99 molarray[i][j] += 99 if pcomp: if powd is None: powdarray[j][i] += 99 powfarray[i][j] += 99 else: powdarray[j][i] += (round(powd, 6)) powdarray[i][j] += (round(powd, 6)) if rcomp: if comp is None: rmsdarray[j][i] += 99 rmsdarray[i][j] += 99 elif comp.nmatched_molecules < rmsd_threshold: rmsdarray[j][i] += 88 rmsdarray[i][j] += 88 else: rmsdarray[j][i] += (round(comp.rmsd, 6)) rmsdarray[i][j] += (round(comp.rmsd, 6)) if molcomp: molframe = pd.DataFrame(molarray, index=filename_list, columns=filename_list) molframe.replace(99, 'nan') molframe.to_csv(outputdir + "/rmsd_molecules_matched.csv", index=True, header=True, sep=',') if pcomp: powdframe = pd.DataFrame(powdarray, index=filename_list, columns=filename_list) powdframe.to_csv(outputdir + "/powder_comparison.csv", index=True, header=True, sep=',') if rcomp: rmsdframe = pd.DataFrame(rmsdarray, index=filename_list, columns=filename_list) rmsdframe.to_csv(outputdir + "/rmsd_comparison.csv", index=True, header=True, sep=',')