def calcAdditionalMetrics(self,metric,normType,newMetric): # calculate the Calpha weights for each dataset (see CalphaWeight class for details) # for metric "metric" (loss, gain, mean etc.) # 'newMetric' takes values ('Calpha','netChange','linreg','subtract1') options = ['Calpha','netChange','linreg','subtract1','average'] if newMetric == 'Calpha': print 'Calculating Calpha weights at each dataset...' CAweights = CalphaWeight(self.atomList) CAweights.calculateWeights(metric) # loop over all atoms in list and calculate additional metrics for each atom in atomList counter = 0 numAtoms = self.getNumAtoms() for atom in self.atomList: counter += 1 progress(counter, numAtoms, suffix='') # unessential loading bar add-in if newMetric == 'Calpha': atom.CalphaWeightedDensChange(CAweights,metric) elif newMetric == 'linreg': atom.calcLinReg(self.numLigRegDatasets,'Standard',metric) elif newMetric == 'netChange': atom.calcNetChangeMetric('Standard') elif newMetric == 'subtract1': atom.calcFirstDatasetSubtractedMetric('Standard',metric) elif newMetric == 'average': atom.calcAvMetric(normType,metric) else: print 'new metric type not recognised.. choose from: {}'.format(options) return
def processAtomList(self): # process the input multiPDB list of atom objects to create new # list of atom objects from processedAtom class processedList = [] # calculate the Calpha weights for each dataset (see CalphaWeight class for details) print 'Calculating Calpha weights at each dataset...' CAweights = CalphaWeight(self.unprocessedAtomList) CAweights.calculateWeights() # loop over all atoms in list and determine new atom info (defined by processedAtom class) print 'Creating new list of atom objects within class processedAtom...' counter = 0 num_atoms = len(self.unprocessedAtomList) for oldAtom in self.unprocessedAtomList: counter += 1 progress(counter, num_atoms, suffix='') #unessential loading bar add-in newAtom = processedAtom() newAtom.cloneInfo(oldAtom) newAtom.CalphaWeightedDensChange(CAweights) newAtom.calculateAdditionalMetrics() newAtom.calculateLinReg(self.numDatasets,'Standard') # newAtom.calculateLinReg(self.numDatasets,'Calpha normalised') processedList.append(newAtom) self.processedAtomList = processedList
def retrieve_objectlist(fileName): # this function retrieves a list of objects from a file, given name # of form filename = str(len(PDBlist))+'_'+str(pdbName)+'_data.pkl' print 'Retrieving dataset from .pkl file...' checkFileFormat(fileName) #to determine number of atoms saved to file from file name: num_atoms = (fileName.split('/')[-1]).split('_')[0] print '\nNumber of atoms in file: ' + str(num_atoms) #to retrieve list from file to new list: PDBretrieved = [] with open(str(fileName), 'rb') as input: for i in range(0,int(num_atoms)): atom = None atom = pickle.load(input) PDBretrieved.append(atom) # unessential loading bar add-in progress(i+1, num_atoms, suffix='') # return the list of atom objects PDBretrieved.sort(key=lambda x: x.atomnum) print '\n---> success!' return PDBretrieved
def bdamage_calculate(PDBarray): # function to calculate Bdamage style metric for each atom, to save bdam # attribute for each atom print '\n•••••••••••••••••••••••••••••••••••••••••••••••••••••••' print 'Calculating bdam style metric for atoms in structure...\n' # first order by number of surrounding atoms PDBarray.sort(key=lambda x: x.numsurroundatoms) num_atoms = len(PDBarray) # now loop through atoms and find number of atoms in same packing density bin counter = 0 atom_indices = range(0,len(PDBarray)) for atom in PDBarray: # unessential loading bar add-in progress(counter+1, num_atoms, suffix='') counter += 1 simpacking_bfactors = [] atom_numsurroundatoms = atom.numsurroundatoms k = -1 # unwantedindices list is designed to locate any atoms which have packing density # below the current value and remove them from the subsequent loops unwantedindices = [] for atomindex in atom_indices: k += 1 otheratom = PDBarray[atomindex] if round(atom_numsurroundatoms/10) == round(otheratom.numsurroundatoms/10): simpacking_bfactors.append(float(otheratom.Bfactor)) # since atoms ordered by number of surrounding atoms, this part breaks out of # loop for current atom as soon as packing density bin is larger than that of # current atom elif round(atom_numsurroundatoms/10) < round(otheratom.numsurroundatoms/10): break else: unwantedindices.append(k) # remove the indices from the search here if unwantedindices list is nonempty if len(unwantedindices) != 0: atom_indices = [i for j, i in enumerate(atom_indices) if j not in unwantedindices] bdam = float(atom.Bfactor)/(np.mean(simpacking_bfactors)) atom.bdam = bdam print '\n---> success!'
def findBchange(initialPDB,multiDoseList,Bmetric): # function to determine the Bfactor/Bdamage (specified by Bmetric) # change between the initial and later datasets --> becomes an # object attribute for the later datasets # check that valid metric specified if Bmetric not in ('Bfactor','Bdamage'): print 'Unrecognised metric (choose between Bfactor and Bdamage)' print '---> terminating script...' sys.exit() print '------------------------------------------------------------' print 'Determining {} change between initial and later datasets'.format(str(Bmetric)) num_atoms = len(multiDoseList) counter = 0 # ensure atom list ordered by number of atom in structure (atomnum) multiDoseList.sort(key=lambda x: x.atomnum) initialPDB.sort(key=lambda x: x.atomnum) initpdbindices = range(0,len(initialPDB)) numDatasets = len(multiDoseList[0].densMetric[Bmetric]['Standard']['values']) for atom in multiDoseList: # unessential loading bar add-in counter += 1 progress(counter, num_atoms, suffix='') Inds = ('residuenum','atomtype','basetype','chaintype') atomIndentifier = [getattr(atom, attr) for attr in Inds] k = -1 for atomindex in initpdbindices: k += 1 otheratom = initialPDB[atomindex] if atomIndentifier == [getattr(otheratom, att) for att in Inds]: # determine the Bmetric change between all later datasets and initial dataset BmetricChange = Bmetric+'Change' laterVals = np.array(map(float, atom.densMetric[Bmetric]['Standard']['values'])) initialVal = np.array([float(otheratom.densMetric[Bmetric]['Standard']['values'])]*numDatasets) atom.densMetric[BmetricChange] = list(laterVals - initialVal) break initpdbindices.pop(k) print '\n---> success...'
def findBchange(initialPDB, multiDoseList, Bmetric, relative=True): # function to determine the Bfactor/Bdamage (specified by Bmetric) # change between the initial and later datasets --> becomes an # object attribute for the later datasets # check that valid metric specified if Bmetric not in ('Bfactor', 'Bdamage'): print('Unrecognised metric (choose between Bfactor and Bdamage)') print('---> terminating script...') sys.exit() print('------------------------------------------------------------') print('Finding {} change between first and later datasets'.format(Bmetric)) num_atoms = len(multiDoseList) # ensure atom list ordered by number of atom in structure (atomnum) multiDoseList.sort(key=lambda x: x.atomnum) initialPDB.sort(key=lambda x: x.atomnum) BmetDic = {} initBfacDic = {a.getAtomID(): getattr(a, Bmetric) for a in initialPDB} for c, atom in enumerate(multiDoseList): # unessential loading bar add-in progress(c+1, num_atoms, suffix='') atmID = atom.getAtomID() try: initB = initBfacDic[atmID] except KeyError: print('Error!! Atom "{}" not present in dataset 1'.format(atmID)) initB = np.nan laterBs = np.array( map(float, atom.densMetric[Bmetric]['Standard']['values'])) if not relative: metric = list(laterBs - initB) else: metric = list((laterBs - initB)/initB) BmetDic[atom.getAtomID()] = metric print('\n---> success...') return BmetDic
def findBchange(initialPDB,multiDoseList,Bmetric): # function to determine the Bfactor/Bdamage (specified by Bmetric) # change between the initial and later datasets --> becomes an # object attribute for the later datasets # check that valid metric specified if Bmetric not in ('Bfactor','Bdamage'): print 'Unrecognised metric (choose between Bfactor and Bdamage)' print '---> terminating script...' sys.exit() print '------------------------------------------------------------' print 'Determining {} change between initial and later datasets'.format(str(Bmetric)) num_atoms = len(multiDoseList) counter = 0 # ensure atom list ordered by number of atom in structure (atomnum) multiDoseList.sort(key=lambda x: x.atomnum) initialPDB.sort(key=lambda x: x.atomnum) initpdbindices = range(0,len(initialPDB)) numDatasets = len(multiDoseList[0].densMetric[Bmetric]['Standard']['values']) BmetDic = {} for c,atom in enumerate(multiDoseList): atmID = atom.getAtomID() # unessential loading bar add-in progress(c+1, num_atoms, suffix='') for k,atomindex in enumerate(initpdbindices): otheratom = initialPDB[atomindex] othAtmID = otheratom.getAtomID() if atmID == othAtmID: # determine the Bmetric change between all later datasets and initial dataset BmetricChange = Bmetric + 'Change' laterVals = np.array(map(float, atom.densMetric[Bmetric]['Standard']['values'])) initialVal = np.array([getattr(otheratom,Bmetric)]*numDatasets) BmetDic[atom.getAtomID()] = list(laterVals - initialVal) break initpdbindices.pop(k) print '\n---> success...' return BmetDic
def retrieve_objectlist(fileName = 'untitled.pkl', loadBar = False, logFile = ''): # this function retrieves a list of objects # from a file, given name of form filename = # str(len(PDBlist))+'_'+str(pdbName)+'_data.pkl' ln = 'Retrieving dataset from .pkl file...' if logFile != '': logFile.writeToLog(str = ln) else: print ln checkFileFormat(fileName) #to determine number of atoms saved to file from file name: num_atoms = (fileName.split('/')[-1]).split('_')[0] ln = 'Number of atoms in file: ' + str(num_atoms) if logFile != '': logFile.writeToLog(str = ln) else: print ln #to retrieve list from file to new list: PDBretrieved = [] with open(str(fileName), 'rb') as input: for i in range(0,int(num_atoms)): atom = None atom = pickle.load(input) PDBretrieved.append(atom) # unessential loading bar add-in if loadBar is True: progress(i+1, num_atoms, suffix='') # return the list of atom objects PDBretrieved.sort(key = lambda x: x.atomnum) return PDBretrieved
def numsurroundatms_extract(initialPDBarray,laterPDBarray): # function to extract numbers for surrounding atoms from intial pdb structure # and extend these values to the same atoms in later pdb structures (for the # same damage series) # loop through the later dataset and assign the corresponding num of # neighbouring atoms from the same atom in the initial dataset. # Here the seenatoms list is filled as loop progresses to speed up loop # by ensuring that atom in initialPDBarray cannot be called again once it # has been found in the laterPDBarray list print '\n•••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••' print 'Extracting number of surrounding atoms from initial PDB file...\n' num_atoms = len(laterPDBarray) # ensure atom list ordered by number of atom in structure (atomnum) laterPDBarray.sort(key=lambda x: x.atomnum) initialPDBarray.sort(key=lambda x: x.atomnum) initfile_indices = range(0,len(initialPDBarray)) counter = 0 for atom in laterPDBarray: counter += 1 # unessential loading bar add-in progress(counter, num_atoms, suffix='') k = -1 for atomindex in initfile_indices: k += 1 otheratom = initialPDBarray[atomindex] if (atom.atomtype == otheratom.atomtype and atom.basetype == otheratom.basetype and atom.residuenum == otheratom.residuenum and atom.chaintype == otheratom.chaintype): atom.numsurroundatoms = otheratom.numsurroundatoms atom.numsurroundprotons = otheratom.numsurroundprotons break initfile_indices.pop(k) print '\n---> success!'
def numsurroundatoms_calculate(initialPDBfile,PDBarray,threshold): # function determines for each atom in structure the number of neighbouring atoms within # a threshold (defined above) for all atoms. For each atom, number of contacts added # as class attribute for atom print '••••••••••••••••••••••••••••••••••••••••••••••••••••' print 'Calculating contact number for atoms in structure...' # determine the correct extended pdb file, with atoms present up to 1 unit cell # away from the original structure inputpdbfile1 = initialPDBfile # determine the space group for the input pdb file: pdbin = open(str(inputpdbfile1),'r') for line in pdbin.readlines(): if 'CRYST1' in line[0:6]: space_group = line[55:66] pdbin.close # run the above functions to (a) determine the symmetrically related # atoms to the original structure, (b) translate to determine the # location of all atoms within the adjacent 26 unit cells to the # original structure, and (c) to restrict to atoms only within 14 # Angstroms of the original structure. outputpdbfile1 = initialPDBfile[:-4]+'_pdbCURsymgenOUT.pdb' pdbCUR_symgen(inputpdbfile1,outputpdbfile1,space_group) outputpdbfile2 = initialPDBfile[:-4]+'_translate26cells.pdb' translate26cells(outputpdbfile1,outputpdbfile2) extended_pdbfile = initialPDBfile[:-4]+'_restrict14A.pdb' restrict14A(PDBarray,outputpdbfile2,extended_pdbfile) # read through extended 14A pdb file and collect all xyz coords of atoms # into a list allcoords. allcoords_atmtypes contains the atom identifier # name for easy reference to the atom type associated with each atom found pdbin = open(extended_pdbfile,'r') allcoords = [] allcoords_atmtypes = [] for line in pdbin.readlines(): if ('ATOM' in line[0:5] or 'HETATM' in line[0:6]): allcoords.append([float(line[30:38]),float(line[38:46]),float(line[46:54])]) allcoords_atmtypes.append(str(line[76:78]).strip()) pdbin.close() # convert here the atom names in allcoords_atmtypes into proton numbers print 'Locating proton number for each atom close to structure...' allcoords_protons = [] for element in allcoords_atmtypes: atomdetailfile = open('VDVradiusfile.txt','r') for line in atomdetailfile.readlines(): if element == line.split()[1]: allcoords_protons.append(int(line.split()[0])) break atomdetailfile.close() allcoords_protons = np.array(allcoords_protons) # check that all atoms have been assigned proton numbers in last step if len(allcoords_atmtypes) != len(allcoords_protons): print 'Not all atoms within 14A of structure successfully assigned proton numbers' print '---> terminating script...' sys.exit() else: print '---> success!' del allcoords_atmtypes counter = 0 num_atoms = len(PDBarray) for atom in PDBarray: counter += 1 # unessential progress bar added here progress(counter, num_atoms, suffix='') # want to determine the number of contacts (defined as number of atoms) # and also number of protons (to distinguish between different atom types) num_contacts = 0 num_protons = 0 atmxyz = np.array([[atom.X_coord,atom.Y_coord,atom.Z_coord]]) # efficient distance calculation dist = spa.distance.cdist(np.array(allcoords),atmxyz) # sorted_dist = np.sort(dist,axis=None) sort_order = dist.argsort(axis=None) sorted_dist = dist[sort_order] sorted_allcoords_protons = allcoords_protons[sort_order] del dist,sort_order,atmxyz num_contacts = next(x[0] for x in enumerate(sorted_dist) if x[1] > threshold) # for element in sorted_dist: # if element < threshold: # num_contacts += 1 # else: # break num_protons = sum(sorted_allcoords_protons[:num_contacts+1]) atom.numsurroundatoms = num_contacts atom.numsurroundprotons = num_protons del num_contacts,num_protons,sorted_allcoords_protons,sorted_dist print '\n---> success!'