Пример #1
0
	def calcAdditionalMetrics(self,metric,normType,newMetric):
		# calculate the Calpha weights for each dataset (see CalphaWeight class for details)
		# for metric "metric" (loss, gain, mean etc.)
		# 'newMetric' takes values ('Calpha','netChange','linreg','subtract1')
		options = ['Calpha','netChange','linreg','subtract1','average']
		if newMetric == 'Calpha':
			print 'Calculating Calpha weights at each dataset...'
			CAweights = CalphaWeight(self.atomList)
			CAweights.calculateWeights(metric)

		# loop over all atoms in list and calculate additional metrics for each atom in atomList
		counter = 0
		numAtoms = self.getNumAtoms()
		for atom in self.atomList:
			counter += 1
			progress(counter, numAtoms, suffix='') # unessential loading bar add-in
			if newMetric == 'Calpha':
				atom.CalphaWeightedDensChange(CAweights,metric)
			elif newMetric == 'linreg':
				atom.calcLinReg(self.numLigRegDatasets,'Standard',metric)
			elif newMetric == 'netChange':
				atom.calcNetChangeMetric('Standard')
			elif newMetric == 'subtract1':
				atom.calcFirstDatasetSubtractedMetric('Standard',metric)
			elif newMetric == 'average':
				atom.calcAvMetric(normType,metric)
			else:
				print 'new metric type not recognised.. choose from: {}'.format(options)
				return
Пример #2
0
	def processAtomList(self):
		# process the input multiPDB list of atom objects to create new
		# list of atom objects from processedAtom class
		processedList = []

		# calculate the Calpha weights for each dataset (see CalphaWeight class for details)
		print 'Calculating Calpha weights at each dataset...'
		CAweights = CalphaWeight(self.unprocessedAtomList)
		CAweights.calculateWeights()

		# loop over all atoms in list and determine new atom info (defined by processedAtom class)
		print 'Creating new list of atom objects within class processedAtom...'
		counter = 0
		num_atoms = len(self.unprocessedAtomList)
		for oldAtom in self.unprocessedAtomList:
			counter += 1
			progress(counter, num_atoms, suffix='') #unessential loading bar add-in
			newAtom = processedAtom()
			newAtom.cloneInfo(oldAtom)
			newAtom.CalphaWeightedDensChange(CAweights)
			newAtom.calculateAdditionalMetrics()
			newAtom.calculateLinReg(self.numDatasets,'Standard')
			# newAtom.calculateLinReg(self.numDatasets,'Calpha normalised')

			processedList.append(newAtom)
		self.processedAtomList = processedList
Пример #3
0
def retrieve_objectlist(fileName):
    # this function retrieves a list of objects from a file, given name
    # of form filename = str(len(PDBlist))+'_'+str(pdbName)+'_data.pkl'
    print 'Retrieving dataset from .pkl file...'
    checkFileFormat(fileName)
    
    #to determine number of atoms saved to file from file name:
    num_atoms = (fileName.split('/')[-1]).split('_')[0]
    print '\nNumber of atoms in file: ' + str(num_atoms)
        
    #to retrieve list from file to new list:
    PDBretrieved = []    
    with open(str(fileName), 'rb') as input:
        for i in range(0,int(num_atoms)):
            atom = None
            atom = pickle.load(input)
            PDBretrieved.append(atom)

            # unessential loading bar add-in
            progress(i+1, num_atoms, suffix='')
       
    # return the list of atom objects  
    PDBretrieved.sort(key=lambda x: x.atomnum)
    print '\n---> success!'
    
    return PDBretrieved
Пример #4
0
def bdamage_calculate(PDBarray):

    # function to calculate Bdamage style metric for each atom, to save bdam 
    # attribute for each atom
    print '\n•••••••••••••••••••••••••••••••••••••••••••••••••••••••'
    print 'Calculating bdam style metric for atoms in structure...\n'

    # first order by number of surrounding atoms
    PDBarray.sort(key=lambda x: x.numsurroundatoms)
    num_atoms = len(PDBarray)

    # now loop through atoms and find number of atoms in same packing density bin
    counter = 0
    atom_indices = range(0,len(PDBarray))
    for atom in PDBarray:

        # unessential loading bar add-in
        progress(counter+1, num_atoms, suffix='')
        counter += 1

        simpacking_bfactors = []
        atom_numsurroundatoms = atom.numsurroundatoms
        k = -1
        # unwantedindices list is designed to locate any atoms which have packing density
        # below the current value and remove them from the subsequent loops
        unwantedindices = []
        for atomindex in atom_indices:

            k += 1
            otheratom = PDBarray[atomindex]

            if round(atom_numsurroundatoms/10) == round(otheratom.numsurroundatoms/10):
                simpacking_bfactors.append(float(otheratom.Bfactor))
            # since atoms ordered by number of surrounding atoms, this part breaks out of 
            # loop for current atom as soon as packing density bin is larger than that of
            # current atom    
            elif round(atom_numsurroundatoms/10) < round(otheratom.numsurroundatoms/10):
                break
            else:
                unwantedindices.append(k)

        # remove the indices from the search here if unwantedindices list is nonempty
        if len(unwantedindices) != 0:
            atom_indices = [i for j, i in enumerate(atom_indices) if j not in unwantedindices]


        bdam = float(atom.Bfactor)/(np.mean(simpacking_bfactors))
        
        atom.bdam = bdam
    print '\n---> success!'
Пример #5
0
def findBchange(initialPDB,multiDoseList,Bmetric):
	# function to determine the Bfactor/Bdamage (specified by Bmetric)
	# change between the initial and later datasets --> becomes an 
	# object attribute for the later datasets

	# check that valid metric specified
	if Bmetric not in ('Bfactor','Bdamage'):
		print 'Unrecognised metric (choose between Bfactor and Bdamage)'
		print '---> terminating script...'
		sys.exit()

	print '------------------------------------------------------------'
	print 'Determining {} change between initial and later datasets'.format(str(Bmetric))
	num_atoms = len(multiDoseList)
	counter = 0

	# ensure atom list ordered by number of atom in structure (atomnum)
	multiDoseList.sort(key=lambda x: x.atomnum)
	initialPDB.sort(key=lambda x: x.atomnum)

	initpdbindices = range(0,len(initialPDB))
	numDatasets = len(multiDoseList[0].densMetric[Bmetric]['Standard']['values'])
	for atom in multiDoseList:

		# unessential loading bar add-in
		counter += 1
		progress(counter, num_atoms, suffix='')

		Inds = ('residuenum','atomtype','basetype','chaintype')
		atomIndentifier = [getattr(atom, attr) for attr in Inds]
		k = -1
		for atomindex in initpdbindices:
			k += 1
			otheratom = initialPDB[atomindex]

	        if atomIndentifier == [getattr(otheratom, att) for att in Inds]:        
				# determine the Bmetric change between all later datasets and initial dataset
				BmetricChange = Bmetric+'Change'
				laterVals = np.array(map(float, atom.densMetric[Bmetric]['Standard']['values']))
				initialVal = np.array([float(otheratom.densMetric[Bmetric]['Standard']['values'])]*numDatasets)
				atom.densMetric[BmetricChange] = list(laterVals - initialVal)
				break

		initpdbindices.pop(k)        
	print '\n---> success...'
Пример #6
0
def findBchange(initialPDB, multiDoseList, Bmetric, relative=True):
    # function to determine the Bfactor/Bdamage (specified by Bmetric)
    # change between the initial and later datasets --> becomes an
    # object attribute for the later datasets

    # check that valid metric specified
    if Bmetric not in ('Bfactor', 'Bdamage'):
        print('Unrecognised metric (choose between Bfactor and Bdamage)')
        print('---> terminating script...')
        sys.exit()

    print('------------------------------------------------------------')
    print('Finding {} change between first and later datasets'.format(Bmetric))
    num_atoms = len(multiDoseList)

    # ensure atom list ordered by number of atom in structure (atomnum)
    multiDoseList.sort(key=lambda x: x.atomnum)
    initialPDB.sort(key=lambda x: x.atomnum)

    BmetDic = {}
    initBfacDic = {a.getAtomID(): getattr(a, Bmetric) for a in initialPDB}

    for c, atom in enumerate(multiDoseList):

        # unessential loading bar add-in
        progress(c+1, num_atoms, suffix='')

        atmID = atom.getAtomID()
        try:
            initB = initBfacDic[atmID]
        except KeyError:
            print('Error!! Atom "{}" not present in dataset 1'.format(atmID))
            initB = np.nan
        laterBs = np.array(
            map(float, atom.densMetric[Bmetric]['Standard']['values']))

        if not relative:
            metric = list(laterBs - initB)
        else:
            metric = list((laterBs - initB)/initB)
        BmetDic[atom.getAtomID()] = metric

    print('\n---> success...')
    return BmetDic
Пример #7
0
def findBchange(initialPDB,multiDoseList,Bmetric):
	# function to determine the Bfactor/Bdamage (specified by Bmetric)
	# change between the initial and later datasets --> becomes an 
	# object attribute for the later datasets

	# check that valid metric specified
	if Bmetric not in ('Bfactor','Bdamage'):
		print 'Unrecognised metric (choose between Bfactor and Bdamage)'
		print '---> terminating script...'
		sys.exit()

	print '------------------------------------------------------------'
	print 'Determining {} change between initial and later datasets'.format(str(Bmetric))
	num_atoms = len(multiDoseList)
	counter = 0

	# ensure atom list ordered by number of atom in structure (atomnum)
	multiDoseList.sort(key=lambda x: x.atomnum)
	initialPDB.sort(key=lambda x: x.atomnum)

	initpdbindices = range(0,len(initialPDB))
	numDatasets = len(multiDoseList[0].densMetric[Bmetric]['Standard']['values'])
	BmetDic = {}
	for c,atom in enumerate(multiDoseList):
		atmID    = atom.getAtomID()

		# unessential loading bar add-in
		progress(c+1, num_atoms, suffix='')

		for k,atomindex in enumerate(initpdbindices):
			otheratom = initialPDB[atomindex]
			othAtmID = otheratom.getAtomID()
			if atmID == othAtmID: 
				# determine the Bmetric change between all later datasets and initial dataset
				BmetricChange = Bmetric + 'Change'
				laterVals = np.array(map(float, atom.densMetric[Bmetric]['Standard']['values']))
				initialVal = np.array([getattr(otheratom,Bmetric)]*numDatasets)
				BmetDic[atom.getAtomID()] = list(laterVals - initialVal)
				break

		initpdbindices.pop(k)        
	print '\n---> success...'
	return BmetDic
Пример #8
0
def retrieve_objectlist(fileName = 'untitled.pkl',
                        loadBar  = False,
                        logFile  = ''):

    # this function retrieves a list of objects 
    # from a file, given name of form filename = 
    # str(len(PDBlist))+'_'+str(pdbName)+'_data.pkl'

    ln = 'Retrieving dataset from .pkl file...'
    if logFile != '':
        logFile.writeToLog(str = ln)
    else:
        print ln

    checkFileFormat(fileName)
    
    #to determine number of atoms saved to file from file name:
    num_atoms = (fileName.split('/')[-1]).split('_')[0]
    ln = 'Number of atoms in file: ' + str(num_atoms)
        
    if logFile != '':
        logFile.writeToLog(str = ln)
    else:
        print ln

    #to retrieve list from file to new list:
    PDBretrieved = []    
    with open(str(fileName), 'rb') as input:
        for i in range(0,int(num_atoms)):
            atom = None
            atom = pickle.load(input)
            PDBretrieved.append(atom)

            # unessential loading bar add-in
            if loadBar is True:
                progress(i+1, num_atoms, suffix='')
       
    # return the list of atom objects  
    PDBretrieved.sort(key = lambda x: x.atomnum)
    
    return PDBretrieved
Пример #9
0
def numsurroundatms_extract(initialPDBarray,laterPDBarray):
    # function to extract numbers for surrounding atoms from intial pdb structure
    # and extend these values to the same atoms in later pdb structures (for the 
    # same damage series)

    # loop through the later dataset and assign the corresponding num of 
    # neighbouring atoms from the same atom in the initial dataset.
    # Here the seenatoms list is filled as loop progresses to speed up loop
    # by ensuring that atom in initialPDBarray cannot be called again once it
    # has been found in the laterPDBarray list
    print '\n•••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••••'
    print 'Extracting number of surrounding atoms from initial PDB file...\n'
    num_atoms = len(laterPDBarray)

    # ensure atom list ordered by number of atom in structure (atomnum)
    laterPDBarray.sort(key=lambda x: x.atomnum)
    initialPDBarray.sort(key=lambda x: x.atomnum)

    initfile_indices = range(0,len(initialPDBarray))
    counter = 0
    for atom in laterPDBarray:
        counter += 1

        # unessential loading bar add-in
        progress(counter, num_atoms, suffix='')

        k = -1
        for atomindex in initfile_indices:
            k += 1
            otheratom = initialPDBarray[atomindex]
            if (atom.atomtype == otheratom.atomtype and
               atom.basetype == otheratom.basetype and
               atom.residuenum == otheratom.residuenum and
               atom.chaintype == otheratom.chaintype):
                atom.numsurroundatoms = otheratom.numsurroundatoms
                atom.numsurroundprotons = otheratom.numsurroundprotons
                break
        initfile_indices.pop(k)  
    print '\n---> success!'
Пример #10
0
def numsurroundatoms_calculate(initialPDBfile,PDBarray,threshold):
    # function determines for each atom in structure the number of neighbouring atoms within 
    # a threshold (defined above) for all atoms. For each atom, number of contacts added
    # as class attribute for atom
    print '••••••••••••••••••••••••••••••••••••••••••••••••••••'
    print 'Calculating contact number for atoms in structure...'

    # determine the correct extended pdb file, with atoms present up to 1 unit cell 
    # away from the original structure
    inputpdbfile1 = initialPDBfile

    # determine the space group for the input pdb file:
    pdbin = open(str(inputpdbfile1),'r')
    for line in pdbin.readlines():
        if 'CRYST1' in line[0:6]:
            space_group = line[55:66]
    pdbin.close

    # run the above functions to (a) determine the symmetrically related
    # atoms to the original structure, (b) translate to determine the 
    # location of all atoms within the adjacent 26 unit cells to the 
    # original structure, and (c) to restrict to atoms only within 14 
    # Angstroms of the original structure.
    outputpdbfile1 = initialPDBfile[:-4]+'_pdbCURsymgenOUT.pdb'
    pdbCUR_symgen(inputpdbfile1,outputpdbfile1,space_group)

    outputpdbfile2 = initialPDBfile[:-4]+'_translate26cells.pdb'
    translate26cells(outputpdbfile1,outputpdbfile2)
    
    extended_pdbfile = initialPDBfile[:-4]+'_restrict14A.pdb'
    restrict14A(PDBarray,outputpdbfile2,extended_pdbfile)

    # read through extended 14A pdb file and collect all xyz coords of atoms
    # into a list allcoords. allcoords_atmtypes contains the atom identifier 
    # name for easy reference to the atom type associated with each atom found
    pdbin = open(extended_pdbfile,'r')
    allcoords = []
    allcoords_atmtypes = []
    for line in pdbin.readlines():
        if ('ATOM' in line[0:5] or 'HETATM' in line[0:6]):
            allcoords.append([float(line[30:38]),float(line[38:46]),float(line[46:54])])
            allcoords_atmtypes.append(str(line[76:78]).strip())
    pdbin.close()


    # convert here the atom names in allcoords_atmtypes into proton numbers
    print 'Locating proton number for each atom close to structure...'
    allcoords_protons = []
    for element in allcoords_atmtypes:
        atomdetailfile = open('VDVradiusfile.txt','r')
        for line in atomdetailfile.readlines():
            if element == line.split()[1]:
                allcoords_protons.append(int(line.split()[0]))
                break
        atomdetailfile.close()
    allcoords_protons = np.array(allcoords_protons)

    # check that all atoms have been assigned proton numbers in last step
    if len(allcoords_atmtypes) != len(allcoords_protons):
        print 'Not all atoms within 14A of structure successfully assigned proton numbers'
        print '---> terminating script...'
        sys.exit()
    else:
        print '---> success!'
    del allcoords_atmtypes

    counter = 0
    num_atoms = len(PDBarray)
    for atom in PDBarray:
        counter += 1

        # unessential progress bar added here
        progress(counter, num_atoms, suffix='')

        # want to determine the number of contacts (defined as number of atoms)
        # and also number of protons (to distinguish between different atom types)
        num_contacts = 0
        num_protons = 0
        atmxyz = np.array([[atom.X_coord,atom.Y_coord,atom.Z_coord]])

        # efficient distance calculation
        dist = spa.distance.cdist(np.array(allcoords),atmxyz)

        # sorted_dist = np.sort(dist,axis=None)
        sort_order = dist.argsort(axis=None)
        sorted_dist = dist[sort_order]
        sorted_allcoords_protons = allcoords_protons[sort_order]
        del dist,sort_order,atmxyz

        num_contacts = next(x[0] for x in enumerate(sorted_dist) if x[1] > threshold)
        # for element in sorted_dist:
        #     if element < threshold:
        #         num_contacts += 1
        #     else:
        #         break
        num_protons = sum(sorted_allcoords_protons[:num_contacts+1])
           
        atom.numsurroundatoms = num_contacts
        atom.numsurroundprotons = num_protons
        del num_contacts,num_protons,sorted_allcoords_protons,sorted_dist

    print '\n---> success!'